Fix queries in elasticsearch latency alerts (#455)

The `elasticsearch_indices_search_fetch_total`,
`elasticsearch_indices_search_fetch_time_seconds`,
`elasticsearch_indices_indexing_index_time_seconds_total`
and `elasticsearch_indices_indexing_index_total` metrics
are counters.

Dividing these metrics doesn't make sense because a spike in
numerator would cause the alert to persist, even if subsequent
fetch/index operations are normal. Adding `increase` changes the query
to check if operations took, on average, more than X over
a 1-minute interval, which was likely the original intent of
this alert.
This commit is contained in:
Pigueiras 2025-03-26 22:15:24 +01:00 committed by GitHub
parent 242054f7dc
commit 97a31f34e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 4 additions and 4 deletions

View file

@ -1147,7 +1147,7 @@ groups:
severity: warning
- name: Elasticsearch High Indexing Latency
description: "The indexing latency on Elasticsearch cluster is higher than the threshold."
query: "elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005"
query: "increase(elasticsearch_indices_indexing_index_time_seconds_total[1m]) / increase(elasticsearch_indices_indexing_index_total[1m]) > 0.0005"
severity: warning
for: 10m
- name: Elasticsearch High Indexing Rate
@ -1162,7 +1162,7 @@ groups:
for: 5m
- name: Elasticsearch High Query Latency
description: "The query latency on Elasticsearch cluster is higher than the threshold."
query: "elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1"
query: "increase(elasticsearch_indices_search_fetch_time_seconds[1m]) / increase(elasticsearch_indices_search_fetch_total[1m]) > 1"
severity: warning
for: 5m

View file

@ -140,7 +140,7 @@ groups:
description: "No new documents for 10 min!\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: ElasticsearchHighIndexingLatency
expr: 'elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005'
expr: 'increase(elasticsearch_indices_indexing_index_time_seconds_total[1m]) / increase(elasticsearch_indices_indexing_index_total[1m]) > 0.0005'
for: 10m
labels:
severity: warning
@ -167,7 +167,7 @@ groups:
description: "The query rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: ElasticsearchHighQueryLatency
expr: 'elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1'
expr: 'increase(elasticsearch_indices_search_fetch_time_seconds[1m]) / increase(elasticsearch_indices_search_fetch_total[1m]) > 1'
for: 5m
labels:
severity: warning