mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 00:47:18 +08:00
SMART monitoring (#402)
* SMART monitoring * query regex fix --------- Co-authored-by: Marek Cervenka <cervenka@ipex.cz>
This commit is contained in:
parent
0727f2ef2e
commit
4eb0e910e7
1 changed files with 32 additions and 0 deletions
|
|
@ -333,6 +333,38 @@ groups:
|
|||
severity: info
|
||||
for: 4h
|
||||
|
||||
- name: S.M.A.R.T Device Monitoring
|
||||
exporters:
|
||||
- name: smartctl-exporter
|
||||
slug: smartctl-exporter
|
||||
doc_url: https://github.com/prometheus-community/smartctl_exporter
|
||||
rules:
|
||||
- name: Smart device temperature warning
|
||||
description: Device temperature warning (instance {{ $labels.instance }})
|
||||
query: smartctl_device_temperature > 60
|
||||
severity: warning
|
||||
for: 2m
|
||||
- name: Smart device temperature critical
|
||||
description: Device temperature critical (instance {{ $labels.instance }})
|
||||
query: smartctl_device_temperature > 80
|
||||
severity: critical
|
||||
for: 2m
|
||||
- name: Smart critical warning
|
||||
description: device has critical warning (instance {{ $labels.instance }})
|
||||
query: smartctl_device_critical_warning > 0
|
||||
severity: critical
|
||||
for: 15m
|
||||
- name: Smart media errors
|
||||
description: device has media errors (instance {{ $labels.instance }})
|
||||
query: smartctl_device_media_errors > 0
|
||||
severity: critical
|
||||
for: 15m
|
||||
- name: Smart NVME Wearout Indicator
|
||||
description: NVMe device is wearing out (instance {{ $labels.instance }})
|
||||
query: smartctl_device_available_spare{device=~"nvme.*"} < smartctl_device_available_spare_threshold{device=~"nvme.*"}
|
||||
severity: critical
|
||||
for: 15m
|
||||
|
||||
- name: Docker containers
|
||||
exporters:
|
||||
- name: google/cAdvisor
|
||||
|
|
|
|||
Loading…
Reference in a new issue