From 97a31f34e5c94af555ac442177a5224175bdb3d6 Mon Sep 17 00:00:00 2001 From: Pigueiras Date: Wed, 26 Mar 2025 22:15:24 +0100 Subject: [PATCH] Fix queries in elasticsearch latency alerts (#455) The `elasticsearch_indices_search_fetch_total`, `elasticsearch_indices_search_fetch_time_seconds`, `elasticsearch_indices_indexing_index_time_seconds_total` and `elasticsearch_indices_indexing_index_total` metrics are counters. Dividing these metrics doesn't make sense because a spike in numerator would cause the alert to persist, even if subsequent fetch/index operations are normal. Adding `increase` changes the query to check if operations took, on average, more than X over a 1-minute interval, which was likely the original intent of this alert. --- _data/rules.yml | 4 ++-- .../prometheus-community-elasticsearch-exporter.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 04b300a..8fa80f3 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1147,7 +1147,7 @@ groups: severity: warning - name: Elasticsearch High Indexing Latency description: "The indexing latency on Elasticsearch cluster is higher than the threshold." - query: "elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005" + query: "increase(elasticsearch_indices_indexing_index_time_seconds_total[1m]) / increase(elasticsearch_indices_indexing_index_total[1m]) > 0.0005" severity: warning for: 10m - name: Elasticsearch High Indexing Rate @@ -1162,7 +1162,7 @@ groups: for: 5m - name: Elasticsearch High Query Latency description: "The query latency on Elasticsearch cluster is higher than the threshold." - query: "elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1" + query: "increase(elasticsearch_indices_search_fetch_time_seconds[1m]) / increase(elasticsearch_indices_search_fetch_total[1m]) > 1" severity: warning for: 5m diff --git a/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml b/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml index 5e6bb9d..12705b5 100644 --- a/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml +++ b/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml @@ -140,7 +140,7 @@ groups: description: "No new documents for 10 min!\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchHighIndexingLatency - expr: 'elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005' + expr: 'increase(elasticsearch_indices_indexing_index_time_seconds_total[1m]) / increase(elasticsearch_indices_indexing_index_total[1m]) > 0.0005' for: 10m labels: severity: warning @@ -167,7 +167,7 @@ groups: description: "The query rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ElasticsearchHighQueryLatency - expr: 'elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1' + expr: 'increase(elasticsearch_indices_search_fetch_time_seconds[1m]) / increase(elasticsearch_indices_search_fetch_total[1m]) > 1' for: 5m labels: severity: warning