From 135d4b7c1a21b6a6fc0f88957663b6d8e0d28a3a Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Sat, 1 May 2021 20:30:23 +0200 Subject: [PATCH] fix(data): for KubernetesPodNotHealthy, insert a step of subquery execution time --- _data/rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 539307a..b58e6b5 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1551,8 +1551,8 @@ groups: severity: info for: 2m - name: Kubernetes Pod not healthy - description: Pod has been in a non-ready state for longer than an hour. - query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:]) > 0' + description: Pod has been in a non-ready state for longer than 15 minutes. + query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[15m:1m]) > 0' severity: critical - name: Kubernetes pod crash looping description: Pod {{ $labels.pod }} is crash looping