From c98a04784e47115475b884f638a67577daaa6ff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sel=C3=A7uk=20Ar=C4=B1bal=C4=B1?= Date: Thu, 2 Apr 2020 21:01:04 +0300 Subject: [PATCH] FIX KubernetesPodnothealthy Alert Kube state metrics assigns value of current pod phase with 1, so according to that Kubernetes Pod not healthy fixed. --- _data/rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index 7b3d427..a5ff36e 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -986,7 +986,7 @@ groups: severity: warning - name: Kubernetes Pod not healthy description: Pod has been in a non-ready state for longer than an hour. - query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:])' + query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"} == 1)[1h:])' severity: error - name: Kubernetes pod crash looping description: Pod {{ $labels.pod }} is crash looping