Update rules.yml

This commit is contained in:
Samuel Berthe 2024-08-28 08:46:41 +02:00 committed by GitHub
parent 3bf8d6d824
commit 995ab4d27a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -267,7 +267,7 @@ groups:
for: 5m
- name: Host node overtemperature alarm
description: "Physical node temperature alarm triggered"
query: '(node_hwmon_temp_crit_alarm_celsius == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
query: '((node_hwmon_temp_crit_alarm_celsius == 1) or (node_hwmon_temp_alarm == 1)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
severity: critical
- name: Host RAID array got inactive
description: "RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically."