From 648b83250a94eeb4ccc3ccd1c8c0a36358dcc347 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Sun, 3 May 2020 18:01:25 +0200 Subject: [PATCH] improve accuracy "Kubernetes Pod not healthy" query --- _data/rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index b1f5847..d491c7f 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -986,7 +986,7 @@ groups: severity: warning - name: Kubernetes Pod not healthy description: Pod has been in a non-ready state for longer than an hour. - query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:]) == 1' + query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:]) > 0' severity: error - name: Kubernetes pod crash looping description: Pod {{ $labels.pod }} is crash looping