From 18672ff0f9803434c913f6569b6a4c06d3c76104 Mon Sep 17 00:00:00 2001 From: Gustavo Kazuo Motizuki Date: Sun, 2 May 2021 05:27:46 +1200 Subject: [PATCH] Improve KubernetesOutOfCapacity alert (#211) --- _data/rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index 7589e43..6f9c4bb 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1498,7 +1498,7 @@ groups: for: 2m - name: Kubernetes out of capacity description: "{{ $labels.node }} is out of capacity" - query: 'sum(kube_pod_info) by (node) / sum(kube_node_status_allocatable_pods) by (node) * 100 > 90' + query: 'sum by (node) ((kube_pod_status_phase{phase="Running"} == 1) + on(pod, namespace) group_left(node) (0 * kube_pod_info)) / sum(kube_node_status_allocatable_pods) by (node) * 100 > 90' severity: warning for: 2m - name: Kubernetes Job failed