From a5f32a0fab398e7096c6f4d551509154996b5208 Mon Sep 17 00:00:00 2001 From: MatthieuFin Date: Sun, 4 Dec 2022 23:08:24 +0100 Subject: [PATCH 1/2] fix(rule): fixing KubernetesPodNotHealthy (#215 #253) (#263) --- _data/rules.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index 654423e..d1251c9 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1692,8 +1692,9 @@ groups: for: 2m - name: Kubernetes Pod not healthy description: Pod has been in a non-ready state for longer than 15 minutes. - query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[15m:1m]) > 0' + query: 'sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"}) > 0' severity: critical + for: 15m - name: Kubernetes pod crash looping description: Pod {{ $labels.pod }} is crash looping query: 'increase(kube_pod_container_status_restarts_total[1m]) > 3' From a4735f3512f345f382471b979014ee17a792cd11 Mon Sep 17 00:00:00 2001 From: samber Date: Sun, 4 Dec 2022 22:08:59 +0000 Subject: [PATCH 2/2] Publish --- dist/rules/kubernetes/kubestate-exporter.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/rules/kubernetes/kubestate-exporter.yml b/dist/rules/kubernetes/kubestate-exporter.yml index ce8fb1b..3c3a1ae 100644 --- a/dist/rules/kubernetes/kubestate-exporter.yml +++ b/dist/rules/kubernetes/kubestate-exporter.yml @@ -149,8 +149,8 @@ groups: description: "The maximum number of desired Pods has been hit\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: KubernetesPodNotHealthy - expr: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[15m:1m]) > 0' - for: 0m + expr: 'sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"}) > 0' + for: 15m labels: severity: critical annotations: