From 2c341445db6e645254eb924591059824068dc7dc Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Fri, 30 Jan 2026 12:14:32 +0100 Subject: [PATCH] Update rules.yml --- _data/rules.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_data/rules.yml b/_data/rules.yml index 1bb6ece..7f4da58 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -273,6 +273,8 @@ groups: description: OOM kill detected query: "(increase(node_vmstat_oom_kill[30m]) > 0)" severity: warning + comments: | + When a machine runs out of memory, the node exporter can become unresponsive for several minutes. Even if the system takes 15–20 minutes to recover, the alert should still trigger. - name: Host EDAC Correctable Errors detected description: 'Host {{ $labels.instance }} has had {{ printf "%.0f" $value }} correctable memory errors reported by EDAC in the last 5 minutes.' query: "(increase(node_edac_correctable_errors_total[1m]) > 0)"