From c87732377ab64442e2fdefc3a3d362413eb0d3c4 Mon Sep 17 00:00:00 2001 From: "marco.verleun" Date: Fri, 30 Apr 2021 15:08:16 +0200 Subject: [PATCH] Add OOMKill alert --- _data/rules.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/_data/rules.yml b/_data/rules.yml index 6fd9802..37715cb 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1546,6 +1546,10 @@ groups: query: 'kube_hpa_status_desired_replicas >= kube_hpa_spec_max_replicas' severity: info for: 2m + - name: Kubernetes Pod OOM killed + description: Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} is OOMKilled + query: kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} > 0 + severity: Critical - name: Kubernetes Pod not healthy description: Pod has been in a non-ready state for longer than an hour. query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:]) > 0'