mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 00:47:18 +08:00
Publish
This commit is contained in:
parent
3db9281508
commit
9f6d4fd2a2
1 changed files with 77 additions and 0 deletions
77
dist/rules/snmp/snmp-exporter.yml
vendored
Normal file
77
dist/rules/snmp/snmp-exporter.yml
vendored
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
groups:
|
||||
|
||||
- name: SnmpExporter
|
||||
|
||||
# These rules use standard IF-MIB and SNMPv2-MIB metrics. Metric names depend on your snmp.yml module configuration.
|
||||
# Thresholds for bandwidth and error rates are rough defaults - adjust to your environment.
|
||||
|
||||
rules:
|
||||
|
||||
# From the official snmp-mixin.
|
||||
- alert: SnmpTargetDown
|
||||
expr: 'up{job=~"snmp.*"} == 0'
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: SNMP target down (instance {{ $labels.instance }})
|
||||
description: "SNMP device {{ $labels.instance }} is unreachable.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: SnmpInterfaceDown
|
||||
expr: 'ifOperStatus{ifAdminStatus="1"} == 2'
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: SNMP interface down (instance {{ $labels.instance }})
|
||||
description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} is operationally down while administratively up.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
# Threshold is a rough default. Adjust based on your network environment.
|
||||
- alert: SnmpInterfaceHighInboundErrorRate
|
||||
expr: 'rate(ifInErrors[5m]) / (rate(ifHCInUcastPkts[5m]) + rate(ifHCInBroadcastPkts[5m]) + rate(ifHCInMulticastPkts[5m])) > 0.05 and (rate(ifHCInUcastPkts[5m]) + rate(ifHCInBroadcastPkts[5m]) + rate(ifHCInMulticastPkts[5m])) > 0'
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: SNMP interface high inbound error rate (instance {{ $labels.instance }})
|
||||
description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} has an inbound error rate above 5%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
# Threshold is a rough default. Adjust based on your network environment.
|
||||
- alert: SnmpInterfaceHighOutboundErrorRate
|
||||
expr: 'rate(ifOutErrors[5m]) / (rate(ifHCOutUcastPkts[5m]) + rate(ifHCOutBroadcastPkts[5m]) + rate(ifHCOutMulticastPkts[5m])) > 0.05 and (rate(ifHCOutUcastPkts[5m]) + rate(ifHCOutBroadcastPkts[5m]) + rate(ifHCOutMulticastPkts[5m])) > 0'
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: SNMP interface high outbound error rate (instance {{ $labels.instance }})
|
||||
description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} has an outbound error rate above 5%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
# Threshold is a rough default. Adjust based on your link capacity and traffic patterns.
|
||||
- alert: SnmpInterfaceHighBandwidthUsageInbound
|
||||
expr: 'rate(ifHCInOctets[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: SNMP interface high bandwidth usage inbound (instance {{ $labels.instance }})
|
||||
description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} inbound utilization is above 80%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
# Threshold is a rough default. Adjust based on your link capacity and traffic patterns.
|
||||
- alert: SnmpInterfaceHighBandwidthUsageOutbound
|
||||
expr: 'rate(ifHCOutOctets[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: SNMP interface high bandwidth usage outbound (instance {{ $labels.instance }})
|
||||
description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} outbound utilization is above 80%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
# sysUpTime is in centiseconds (hundredths of a second).
|
||||
- alert: SnmpDeviceRestarted
|
||||
expr: 'sysUpTime / 100 < 300'
|
||||
for: 0m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: SNMP device restarted (instance {{ $labels.instance }})
|
||||
description: "SNMP device {{ $labels.instance }} has restarted (uptime < 5 minutes).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
Loading…
Reference in a new issue