From fc6b3faadc299610634e79c7174d44150ee5574f Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Tue, 28 Jan 2025 06:04:10 +0100 Subject: [PATCH] Fix from #405 --- _data/rules.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/_data/rules.yml b/_data/rules.yml index 88e0785..6ad5f9d 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -331,6 +331,18 @@ groups: description: Device temperature critical on {{ $labels.instance }} drive {{ $labels.device }} over 70°C query: '(max_over_time(smartctl_device_temperature{temperature_type="current"} [5m]) unless on (instance, device) smartctl_device_temperature{temperature_type="drive_trip"}) > 70' severity: critical + - name: SMART device temperature over trip value + description: Device temperature over trip value on {{ $labels.instance }} drive {{ $labels.device }}) + query: 'max_over_time(smartctl_device_temperature{temperature_type="current"} [10m]) >= on(device, instance) smartctl_device_temperature{temperature_type="drive_trip"}' + severity: critical + - name: SMART device temperature nearing trip value + description: Device temperature at 80% of trip value on {{ $labels.instance }} drive {{ $labels.device }}) + query: 'max_over_time(smartctl_device_temperature{temperature_type="current"} [10m]) >= on(device, instance) (smartctl_device_temperature{temperature_type="drive_trip"} * .80)' + severity: warning + - name: SMART status + description: Device has a SMART status failure on {{ $labels.instance }} drive {{ $labels.device }}) + query: 'smartctl_device_smart_status != 1' + severity: critical - name: SMART critical warning description: Disk controller has critical warning on {{ $labels.instance }} drive {{ $labels.device }}) query: 'smartctl_device_critical_warning > 0'