From 995ab4d27a5cdaa8045f1103f15a7161c4b245b3 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Wed, 28 Aug 2024 08:46:41 +0200 Subject: [PATCH] Update rules.yml --- _data/rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index 6f5d04d..f5ac4bf 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -267,7 +267,7 @@ groups: for: 5m - name: Host node overtemperature alarm description: "Physical node temperature alarm triggered" - query: '(node_hwmon_temp_crit_alarm_celsius == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}' + query: '((node_hwmon_temp_crit_alarm_celsius == 1) or (node_hwmon_temp_alarm == 1)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}' severity: critical - name: Host RAID array got inactive description: "RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically."