mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-26 19:37:27 +08:00
Minor bug fixes
This commit is contained in:
parent
d6ef8e7449
commit
87ee1292e7
1 changed files with 3 additions and 3 deletions
|
|
@ -262,9 +262,9 @@ groups:
|
||||||
description: "Physical node temperature alarm triggered"
|
description: "Physical node temperature alarm triggered"
|
||||||
query: '(node_hwmon_temp_crit_alarm_celsius == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
|
query: '(node_hwmon_temp_crit_alarm_celsius == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: Host Software RAID is not active
|
- name: Host Software RAID insufficient drives
|
||||||
description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} has insufficient drives remaining."
|
description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} has insufficient drives remaining."
|
||||||
query: '(node_md_disks_required - on(device, instance) node_md_disks{state="active"}) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
|
query: '((node_md_disks_required - on(device, instance) node_md_disks{state="active"}) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: Host Software RAID disk failure
|
- name: Host Software RAID disk failure
|
||||||
description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} needs attention."
|
description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} needs attention."
|
||||||
|
|
@ -273,7 +273,7 @@ groups:
|
||||||
for: 2m
|
for: 2m
|
||||||
- name: Host kernel version deviations
|
- name: Host kernel version deviations
|
||||||
description: Kernel version for {{ $labels.instance }} has changed
|
description: Kernel version for {{ $labels.instance }} has changed
|
||||||
query: 'changes(node_uname_info[1h]) == 0'
|
query: 'changes(node_uname_info[1h]) > 0'
|
||||||
severity: info
|
severity: info
|
||||||
for: 6h
|
for: 6h
|
||||||
- name: Host OOM kill detected
|
- name: Host OOM kill detected
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue