SMART monitoring (#402)

* SMART monitoring

* query regex fix

---------

Co-authored-by: Marek Cervenka <cervenka@ipex.cz>
This commit is contained in:
Marek Červenka 2024-02-09 20:23:30 +01:00 committed by GitHub
parent 0727f2ef2e
commit 4eb0e910e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -333,6 +333,38 @@ groups:
severity: info
for: 4h
- name: S.M.A.R.T Device Monitoring
exporters:
- name: smartctl-exporter
slug: smartctl-exporter
doc_url: https://github.com/prometheus-community/smartctl_exporter
rules:
- name: Smart device temperature warning
description: Device temperature warning (instance {{ $labels.instance }})
query: smartctl_device_temperature > 60
severity: warning
for: 2m
- name: Smart device temperature critical
description: Device temperature critical (instance {{ $labels.instance }})
query: smartctl_device_temperature > 80
severity: critical
for: 2m
- name: Smart critical warning
description: device has critical warning (instance {{ $labels.instance }})
query: smartctl_device_critical_warning > 0
severity: critical
for: 15m
- name: Smart media errors
description: device has media errors (instance {{ $labels.instance }})
query: smartctl_device_media_errors > 0
severity: critical
for: 15m
- name: Smart NVME Wearout Indicator
description: NVMe device is wearing out (instance {{ $labels.instance }})
query: smartctl_device_available_spare{device=~"nvme.*"} < smartctl_device_available_spare_threshold{device=~"nvme.*"}
severity: critical
for: 15m
- name: Docker containers
exporters:
- name: google/cAdvisor