mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-27 03:47:07 +08:00
Merge branch 'samber:master' into master
This commit is contained in:
commit
17d51930ac
3 changed files with 147 additions and 0 deletions
|
|
@ -2089,6 +2089,30 @@ groups:
|
||||||
query: 'node_zfs_zpool_state{state!="online"} > 0'
|
query: 'node_zfs_zpool_state{state!="online"} > 0'
|
||||||
severity: critical
|
severity: critical
|
||||||
for: 1m
|
for: 1m
|
||||||
|
- name: ZFS exporter
|
||||||
|
slug: zfs_exporter
|
||||||
|
doc_url: https://github.com/pdf/zfs_exporter
|
||||||
|
rules:
|
||||||
|
- name: ZFS pool out of space
|
||||||
|
description: Disk is almost full (< 10% left)
|
||||||
|
query: 'zfs_pool_free_bytes * 100 / zfs_pool_size_bytes < 10 and ON (instance, device, mountpoint) zfs_pool_readonly == 0'
|
||||||
|
severity: warning
|
||||||
|
- name: ZFS pool unhealthy
|
||||||
|
description: ZFS pool state is {{ $value }}. See comments for more information.
|
||||||
|
query: 'zfs_pool_health > 0'
|
||||||
|
severity: critical
|
||||||
|
comments: |
|
||||||
|
0: ONLINE
|
||||||
|
1: DEGRADED
|
||||||
|
2: FAULTED
|
||||||
|
3: OFFLINE
|
||||||
|
4: UNAVAIL
|
||||||
|
5: REMOVED
|
||||||
|
6: SUSPENDED
|
||||||
|
- name: ZFS collector failed
|
||||||
|
description: ZFS collector for {{ $labels.instance }} has failed to collect information
|
||||||
|
query: 'zfs_scrape_collector_success != 1'
|
||||||
|
severity: warning
|
||||||
|
|
||||||
- name: OpenEBS
|
- name: OpenEBS
|
||||||
exporters:
|
exporters:
|
||||||
|
|
@ -2362,3 +2386,35 @@ groups:
|
||||||
* FAILURE 2 false - The build had a fatal error.
|
* FAILURE 2 false - The build had a fatal error.
|
||||||
* NOT_BUILT 3 false - The module was not built.
|
* NOT_BUILT 3 false - The module was not built.
|
||||||
* ABORTED 4 false - The build was manually aborted.
|
* ABORTED 4 false - The build was manually aborted.
|
||||||
|
|
||||||
|
- name: APC UPS
|
||||||
|
exporters:
|
||||||
|
- name: mdlayher/apcupsd_exporter
|
||||||
|
slug: apcupsd_exporter
|
||||||
|
doc_url: https://github.com/mdlayher/apcupsd_exporter
|
||||||
|
rules:
|
||||||
|
- name: APC UPS Battery nearly empty
|
||||||
|
description: Battery is almost empty (< 10% left)
|
||||||
|
query: 'apcupsd_battery_charge_percent < 10'
|
||||||
|
severity: critical
|
||||||
|
- name: APC UPS Less than 15 Minutes of battery time remaining
|
||||||
|
description: Battery is almost empty (< 15 Minutes remaining)
|
||||||
|
query: 'apcupsd_battery_time_left_seconds < 900'
|
||||||
|
severity: critical
|
||||||
|
- name: APC UPS AC input outage
|
||||||
|
description: UPS now running on battery (since {{$value | humanizeDuration}})
|
||||||
|
query: 'apcupsd_battery_time_on_seconds > 0'
|
||||||
|
severity: warning
|
||||||
|
- name: APC UPS low battery voltage
|
||||||
|
description: Battery voltage is lower than nominal (< 95%)
|
||||||
|
query: '(apcupsd_battery_volts / apcupsd_battery_nominal_volts) < 0.95'
|
||||||
|
severity: warning
|
||||||
|
- name: APC UPS high temperature
|
||||||
|
description: Internal temperature is high ({{$value}}°C)
|
||||||
|
query: 'apcupsd_internal_temperature_celsius >= 40'
|
||||||
|
severity: warning
|
||||||
|
for: 2m
|
||||||
|
- name: APC UPS high load
|
||||||
|
description: UPS load is > 80%
|
||||||
|
query: 'apcupsd_ups_load_percent > 80'
|
||||||
|
severity: warning
|
||||||
|
|
|
||||||
59
dist/rules/apc-ups/apcupsd_exporter.yml
vendored
Normal file
59
dist/rules/apc-ups/apcupsd_exporter.yml
vendored
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
groups:
|
||||||
|
|
||||||
|
- name: Apcupsd_exporter
|
||||||
|
|
||||||
|
rules:
|
||||||
|
|
||||||
|
- alert: ApcUpsBatteryNearlyEmpty
|
||||||
|
expr: 'apcupsd_battery_charge_percent < 10'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: APC UPS Battery nearly empty (instance {{ $labels.instance }})
|
||||||
|
description: "Battery is almost empty (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: ApcUpsLessThan15MinutesOfBatteryTimeRemaining
|
||||||
|
expr: 'apcupsd_battery_time_left_seconds < 900'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: APC UPS Less than 15 Minutes of battery time remaining (instance {{ $labels.instance }})
|
||||||
|
description: "Battery is almost empty (< 15 Minutes remaining)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: ApcUpsAcInputOutage
|
||||||
|
expr: 'apcupsd_battery_time_on_seconds > 0'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: APC UPS AC input outage (instance {{ $labels.instance }})
|
||||||
|
description: "UPS now running on battery (since {{$value | humanizeDuration}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: ApcUpsLowBatteryVoltage
|
||||||
|
expr: '(apcupsd_battery_volts / apcupsd_battery_nominal_volts) < 0.95'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: APC UPS low battery voltage (instance {{ $labels.instance }})
|
||||||
|
description: "Battery voltage is lower than nominal (< 95%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: ApcUpsHighTemperature
|
||||||
|
expr: 'apcupsd_internal_temperature_celsius >= 40'
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: APC UPS high temperature (instance {{ $labels.instance }})
|
||||||
|
description: "Internal temperature is high ({{$value}}°C)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: ApcUpsHighLoad
|
||||||
|
expr: 'apcupsd_ups_load_percent > 80'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: APC UPS high load (instance {{ $labels.instance }})
|
||||||
|
description: "UPS load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
32
dist/rules/zfs/zfs_exporter.yml
vendored
Normal file
32
dist/rules/zfs/zfs_exporter.yml
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
groups:
|
||||||
|
|
||||||
|
- name: Zfs_exporter
|
||||||
|
|
||||||
|
rules:
|
||||||
|
|
||||||
|
- alert: ZfsPoolOutOfSpace
|
||||||
|
expr: 'zfs_pool_free_bytes * 100 / zfs_pool_size_bytes < 10 and ON (instance, device, mountpoint) zfs_pool_readonly == 0'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: ZFS pool out of space (instance {{ $labels.instance }})
|
||||||
|
description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: ZfsPoolUnhealthy
|
||||||
|
expr: 'zfs_pool_health > 0'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: ZFS pool unhealthy (instance {{ $labels.instance }})
|
||||||
|
description: "ZFS pool state is {{ $value }}. See comments for more information.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: ZfsCollectorFailed
|
||||||
|
expr: 'zfs_scrape_collector_success != 1'
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: ZFS collector failed (instance {{ $labels.instance }})
|
||||||
|
description: "ZFS collector for {{ $labels.instance }} has failed to collect information\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
Loading…
Reference in a new issue