This commit is contained in:
samber 2026-03-16 03:50:27 +00:00
parent fd3bfb02c0
commit 577c36d9ae
2 changed files with 8 additions and 8 deletions

View file

@ -16,7 +16,7 @@ groups:
description: "Memcached instance is down on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MemcachedConnectionLimitApproaching(>80%)
expr: '(memcached_current_connections / memcached_max_connections * 100) > 80'
expr: '(memcached_current_connections / memcached_max_connections * 100) > 80 and memcached_max_connections > 0'
for: 2m
labels:
severity: warning
@ -25,7 +25,7 @@ groups:
description: "Memcached connection usage is above 80% on {{ $labels.instance }} (current value: {{ $value }}%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MemcachedConnectionLimitApproaching(>95%)
expr: '(memcached_current_connections / memcached_max_connections * 100) > 95'
expr: '(memcached_current_connections / memcached_max_connections * 100) > 95 and memcached_max_connections > 0'
for: 2m
labels:
severity: critical
@ -44,7 +44,7 @@ groups:
# High memory usage is expected if the cache is well-utilized. This alert fires when it approaches the configured limit, which may cause evictions.
- alert: MemcachedMemoryUsageHigh(>90%)
expr: '(memcached_current_bytes / memcached_limit_bytes * 100) > 90'
expr: '(memcached_current_bytes / memcached_limit_bytes * 100) > 90 and memcached_limit_bytes > 0'
for: 5m
labels:
severity: warning

View file

@ -18,7 +18,7 @@ groups:
description: "SNMP device {{ $labels.instance }} is unreachable.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: SnmpInterfaceDown
expr: 'ifOperStatus{ifAdminStatus="1"} == 2'
expr: '(ifOperStatus{job=~"snmp.*"} == 2) and on(instance, job, ifIndex) (ifAdminStatus{job=~"snmp.*"} == 1)'
for: 2m
labels:
severity: critical
@ -28,7 +28,7 @@ groups:
# Threshold is a rough default. Adjust based on your network environment.
- alert: SnmpInterfaceHighInboundErrorRate
expr: 'rate(ifInErrors[5m]) / (rate(ifHCInUcastPkts[5m]) + rate(ifHCInBroadcastPkts[5m]) + rate(ifHCInMulticastPkts[5m])) > 0.05 and (rate(ifHCInUcastPkts[5m]) + rate(ifHCInBroadcastPkts[5m]) + rate(ifHCInMulticastPkts[5m])) > 0'
expr: 'rate(ifInErrors{job=~"snmp.*"}[5m]) / (rate(ifHCInUcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCInBroadcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCInMulticastPkts{job=~"snmp.*"}[5m])) > 0.05 and (rate(ifHCInUcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCInBroadcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCInMulticastPkts{job=~"snmp.*"}[5m])) > 0'
for: 5m
labels:
severity: warning
@ -38,7 +38,7 @@ groups:
# Threshold is a rough default. Adjust based on your network environment.
- alert: SnmpInterfaceHighOutboundErrorRate
expr: 'rate(ifOutErrors[5m]) / (rate(ifHCOutUcastPkts[5m]) + rate(ifHCOutBroadcastPkts[5m]) + rate(ifHCOutMulticastPkts[5m])) > 0.05 and (rate(ifHCOutUcastPkts[5m]) + rate(ifHCOutBroadcastPkts[5m]) + rate(ifHCOutMulticastPkts[5m])) > 0'
expr: 'rate(ifOutErrors{job=~"snmp.*"}[5m]) / (rate(ifHCOutUcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCOutBroadcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCOutMulticastPkts{job=~"snmp.*"}[5m])) > 0.05 and (rate(ifHCOutUcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCOutBroadcastPkts{job=~"snmp.*"}[5m]) + rate(ifHCOutMulticastPkts{job=~"snmp.*"}[5m])) > 0'
for: 5m
labels:
severity: warning
@ -48,7 +48,7 @@ groups:
# Threshold is a rough default. Adjust based on your link capacity and traffic patterns.
- alert: SnmpInterfaceHighBandwidthUsageInbound
expr: 'rate(ifHCInOctets[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0'
expr: 'rate(ifHCInOctets{job=~"snmp.*"}[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0'
for: 15m
labels:
severity: warning
@ -58,7 +58,7 @@ groups:
# Threshold is a rough default. Adjust based on your link capacity and traffic patterns.
- alert: SnmpInterfaceHighBandwidthUsageOutbound
expr: 'rate(ifHCOutOctets[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0'
expr: 'rate(ifHCOutOctets{job=~"snmp.*"}[5m]) * 8 / ifSpeed > 0.80 and ifSpeed > 0'
for: 15m
labels:
severity: warning