mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-26 11:27:00 +08:00
fix: address PR review comments
- Cassandra connection timeouts: wrap rate() in sum by() (rate() by() is invalid PromQL) - Elasticsearch query latency: add division-by-zero guard - Redis backup: "backuped" → "backed up"
This commit is contained in:
parent
e25ddfdb83
commit
f82fbc98ba
1 changed files with 3 additions and 3 deletions
|
|
@ -848,7 +848,7 @@ groups:
|
||||||
severity: critical
|
severity: critical
|
||||||
for: 2m
|
for: 2m
|
||||||
- name: Redis missing backup
|
- name: Redis missing backup
|
||||||
description: Redis has not been backuped for 48 hours
|
description: Redis has not been backed up for 48 hours
|
||||||
query: "time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 48"
|
query: "time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 48"
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: Redis out of system memory
|
- name: Redis out of system memory
|
||||||
|
|
@ -1207,7 +1207,7 @@ groups:
|
||||||
for: 5m
|
for: 5m
|
||||||
- name: Elasticsearch High Query Latency
|
- name: Elasticsearch High Query Latency
|
||||||
description: "The query latency on Elasticsearch cluster is higher than the threshold."
|
description: "The query latency on Elasticsearch cluster is higher than the threshold."
|
||||||
query: "increase(elasticsearch_indices_search_query_time_seconds[1m]) / increase(elasticsearch_indices_search_query_total[1m]) > 1"
|
query: "increase(elasticsearch_indices_search_query_time_seconds[1m]) / increase(elasticsearch_indices_search_query_total[1m]) > 1 and increase(elasticsearch_indices_search_query_total[1m]) > 0"
|
||||||
severity: warning
|
severity: warning
|
||||||
for: 5m
|
for: 5m
|
||||||
|
|
||||||
|
|
@ -1260,7 +1260,7 @@ groups:
|
||||||
severity: warning
|
severity: warning
|
||||||
- name: "Cassandra connection timeouts total"
|
- name: "Cassandra connection timeouts total"
|
||||||
description: "Some connection between nodes are ending in timeout - {{ $labels.cassandra_cluster }}"
|
description: "Some connection between nodes are ending in timeout - {{ $labels.cassandra_cluster }}"
|
||||||
query: "rate(cassandra_client_request_timeouts_total[5m]) by (cassandra_cluster,instance) > 5"
|
query: "sum by (cassandra_cluster,instance) (rate(cassandra_client_request_timeouts_total[5m])) > 5"
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: "Cassandra storage exceptions"
|
- name: "Cassandra storage exceptions"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue