From 0dba950ccc7935cfb4b89b0751e6256dddaacb87 Mon Sep 17 00:00:00 2001 From: samber Date: Fri, 9 Feb 2024 19:25:17 +0000 Subject: [PATCH] Publish --- .../smartctl-exporter.yml | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 dist/rules/s.m.a.r.t-device-monitoring/smartctl-exporter.yml diff --git a/dist/rules/s.m.a.r.t-device-monitoring/smartctl-exporter.yml b/dist/rules/s.m.a.r.t-device-monitoring/smartctl-exporter.yml new file mode 100644 index 0000000..1946c38 --- /dev/null +++ b/dist/rules/s.m.a.r.t-device-monitoring/smartctl-exporter.yml @@ -0,0 +1,50 @@ +groups: + +- name: SmartctlExporter + + rules: + + - alert: SmartDeviceTemperatureWarning + expr: 'smartctl_device_temperature > 60' + for: 2m + labels: + severity: warning + annotations: + summary: Smart device temperature warning (instance {{ $labels.instance }}) + description: "Device temperature warning (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: SmartDeviceTemperatureCritical + expr: 'smartctl_device_temperature > 80' + for: 2m + labels: + severity: critical + annotations: + summary: Smart device temperature critical (instance {{ $labels.instance }}) + description: "Device temperature critical (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: SmartCriticalWarning + expr: 'smartctl_device_critical_warning > 0' + for: 15m + labels: + severity: critical + annotations: + summary: Smart critical warning (instance {{ $labels.instance }}) + description: "device has critical warning (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: SmartMediaErrors + expr: 'smartctl_device_media_errors > 0' + for: 15m + labels: + severity: critical + annotations: + summary: Smart media errors (instance {{ $labels.instance }}) + description: "device has media errors (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: SmartNvmeWearoutIndicator + expr: 'smartctl_device_available_spare{device=~"nvme.*"} < smartctl_device_available_spare_threshold{device=~"nvme.*"}' + for: 15m + labels: + severity: critical + annotations: + summary: Smart NVME Wearout Indicator (instance {{ $labels.instance }}) + description: "NVMe device is wearing out (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"