This commit is contained in:
samber 2024-02-25 00:32:13 +00:00
parent 46b9ccf057
commit 45a711f921

View file

@ -211,13 +211,13 @@ groups:
summary: Host node overtemperature alarm (instance {{ $labels.instance }}) summary: Host node overtemperature alarm (instance {{ $labels.instance }})
description: "Physical node temperature alarm triggered\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "Physical node temperature alarm triggered\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostSoftwareRaidIsNotActive - alert: HostSoftwareRaidInsufficientDrives
expr: '(node_md_disks_required - on(device, instance) node_md_disks{state="active"}) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}' expr: '((node_md_disks_required - on(device, instance) node_md_disks{state="active"}) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
for: 0m for: 0m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: Host Software RAID is not active (instance {{ $labels.instance }}) summary: Host Software RAID insufficient drives (instance {{ $labels.instance }})
description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} has insufficient drives remaining.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} has insufficient drives remaining.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostSoftwareRaidDiskFailure - alert: HostSoftwareRaidDiskFailure
@ -230,7 +230,7 @@ groups:
description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} needs attention.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} needs attention.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostKernelVersionDeviations - alert: HostKernelVersionDeviations
expr: 'changes(node_uname_info[1h]) == 0' expr: 'changes(node_uname_info[1h]) > 0'
for: 6h for: 6h
labels: labels:
severity: info severity: info