fix: address PR review comments

- Cassandra connection timeouts: wrap rate() in sum by() (rate() by() is invalid PromQL)
- Elasticsearch query latency: add division-by-zero guard
- Redis backup: "backuped" → "backed up"
This commit is contained in:
Samuel Berthe 2026-03-16 01:21:09 +01:00
parent e25ddfdb83
commit f82fbc98ba

View file

@ -848,7 +848,7 @@ groups:
severity: critical
for: 2m
- name: Redis missing backup
description: Redis has not been backuped for 48 hours
description: Redis has not been backed up for 48 hours
query: "time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 48"
severity: critical
- name: Redis out of system memory
@ -1207,7 +1207,7 @@ groups:
for: 5m
- name: Elasticsearch High Query Latency
description: "The query latency on Elasticsearch cluster is higher than the threshold."
query: "increase(elasticsearch_indices_search_query_time_seconds[1m]) / increase(elasticsearch_indices_search_query_total[1m]) > 1"
query: "increase(elasticsearch_indices_search_query_time_seconds[1m]) / increase(elasticsearch_indices_search_query_total[1m]) > 1 and increase(elasticsearch_indices_search_query_total[1m]) > 0"
severity: warning
for: 5m
@ -1260,7 +1260,7 @@ groups:
severity: warning
- name: "Cassandra connection timeouts total"
description: "Some connection between nodes are ending in timeout - {{ $labels.cassandra_cluster }}"
query: "rate(cassandra_client_request_timeouts_total[5m]) by (cassandra_cluster,instance) > 5"
query: "sum by (cassandra_cluster,instance) (rate(cassandra_client_request_timeouts_total[5m])) > 5"
for: 2m
severity: critical
- name: "Cassandra storage exceptions"