groups: - name: SnmpExporter # These rules use standard IF-MIB and SNMPv2-MIB metrics. Metric names depend on your snmp.yml module configuration. # Thresholds for bandwidth and error rates are rough defaults - adjust to your environment. rules: # From the official snmp-mixin. - alert: SnmpTargetDown expr: 'up{job=~"snmp.*"} == 0' for: 5m labels: severity: critical annotations: summary: SNMP target down (instance {{ $labels.instance }}) description: "SNMP device {{ $labels.instance }} is unreachable.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: SnmpInterfaceDown expr: 'ifOperStatus{ifAdminStatus="1"} == 2' for: 2m labels: severity: critical annotations: summary: SNMP interface down (instance {{ $labels.instance }}) description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} is operationally down while administratively up.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold is a rough default. Adjust based on your network environment. - alert: SnmpInterfaceHighInboundErrorRate expr: 'rate(ifInErrors[5m]) / (rate(ifHCInUcastPkts[5m]) + rate(ifHCInBroadcastPkts[5m]) + rate(ifHCInMulticastPkts[5m])) > 0.05 and (rate(ifHCInUcastPkts[5m]) + rate(ifHCInBroadcastPkts[5m]) + rate(ifHCInMulticastPkts[5m])) > 0' for: 5m labels: severity: warning annotations: summary: SNMP interface high inbound error rate (instance {{ $labels.instance }}) description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} has an inbound error rate above 5%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold is a rough default. Adjust based on your network environment. - alert: SnmpInterfaceHighOutboundErrorRate expr: 'rate(ifOutErrors[5m]) / (rate(ifHCOutUcastPkts[5m]) + rate(ifHCOutBroadcastPkts[5m]) + rate(ifHCOutMulticastPkts[5m])) > 0.05 and (rate(ifHCOutUcastPkts[5m]) + rate(ifHCOutBroadcastPkts[5m]) + rate(ifHCOutMulticastPkts[5m])) > 0' for: 5m labels: severity: warning annotations: summary: SNMP interface high outbound error rate (instance {{ $labels.instance }}) description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} has an outbound error rate above 5%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold is a rough default. Adjust based on your link capacity and traffic patterns. - alert: SnmpInterfaceHighBandwidthUsageInbound expr: 'rate(ifHCInOctets[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0' for: 15m labels: severity: warning annotations: summary: SNMP interface high bandwidth usage inbound (instance {{ $labels.instance }}) description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} inbound utilization is above 80%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold is a rough default. Adjust based on your link capacity and traffic patterns. - alert: SnmpInterfaceHighBandwidthUsageOutbound expr: 'rate(ifHCOutOctets[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0' for: 15m labels: severity: warning annotations: summary: SNMP interface high bandwidth usage outbound (instance {{ $labels.instance }}) description: "Interface {{ $labels.ifDescr }} on {{ $labels.instance }} outbound utilization is above 80%.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # sysUpTime is in centiseconds (hundredths of a second). - alert: SnmpDeviceRestarted expr: 'sysUpTime / 100 < 300' for: 0m labels: severity: info annotations: summary: SNMP device restarted (instance {{ $labels.instance }}) description: "SNMP device {{ $labels.instance }} has restarted (uptime < 5 minutes).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"