mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-23 01:47:17 +08:00
117 lines
4.8 KiB
YAML
117 lines
4.8 KiB
YAML
groups:
|
|
|
|
- name: Oliver006RedisExporter
|
|
|
|
|
|
rules:
|
|
|
|
# 1m delay allows a restart without triggering an alert.
|
|
- alert: RedisDown
|
|
expr: 'redis_up == 0'
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Redis down (instance {{ $labels.instance }})
|
|
description: "Redis instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisMissingMaster
|
|
expr: '(count(redis_instance_info{role="master"}) or vector(0)) < 1'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Redis missing master (instance {{ $labels.instance }})
|
|
description: "Redis cluster has no node marked as master.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
# 1m delay allows a restart without triggering an alert.
|
|
- alert: RedisTooManyMasters
|
|
expr: 'count(redis_instance_info{role="master"}) > 1'
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Redis too many masters (instance {{ $labels.instance }})
|
|
description: "Redis cluster has too many nodes marked as master.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisDisconnectedSlaves
|
|
expr: 'count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 0'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Redis disconnected slaves (instance {{ $labels.instance }})
|
|
description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisReplicationBroken
|
|
expr: 'delta(redis_connected_slaves[1m]) < 0'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Redis replication broken (instance {{ $labels.instance }})
|
|
description: "Redis instance lost a slave\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisClusterFlapping
|
|
expr: 'changes(redis_connected_slaves[1m]) > 1'
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Redis cluster flapping (instance {{ $labels.instance }})
|
|
description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisMissingBackup
|
|
expr: 'time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 48'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Redis missing backup (instance {{ $labels.instance }})
|
|
description: "Redis has not been backed up for 48 hours\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
# The exporter must be started with --include-system-metrics flag or REDIS_EXPORTER_INCL_SYSTEM_METRICS=true environment variable.
|
|
- alert: RedisOutOfSystemMemory
|
|
expr: 'redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90'
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Redis out of system memory (instance {{ $labels.instance }})
|
|
description: "Redis is running out of system memory (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisOutOfConfiguredMaxmemory
|
|
expr: 'redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90 and on(instance) redis_memory_max_bytes > 0'
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
|
|
description: "Redis is running out of configured maxmemory (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisTooManyConnections
|
|
expr: 'redis_connected_clients / redis_config_maxclients * 100 > 90'
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Redis too many connections (instance {{ $labels.instance }})
|
|
description: "Redis is running out of connections (> 90% used)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisNotEnoughConnections
|
|
expr: 'redis_connected_clients < 5'
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Redis not enough connections (instance {{ $labels.instance }})
|
|
description: "Redis instance should have more connections (> 5)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: RedisRejectedConnections
|
|
expr: 'increase(redis_rejected_connections_total[1m]) > 5'
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Redis rejected connections (instance {{ $labels.instance }})
|
|
description: "Some connections to Redis has been rejected\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|