mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-24 18:36:59 +08:00
Fix queries in elasticsearch latency alerts (#455)
The `elasticsearch_indices_search_fetch_total`, `elasticsearch_indices_search_fetch_time_seconds`, `elasticsearch_indices_indexing_index_time_seconds_total` and `elasticsearch_indices_indexing_index_total` metrics are counters. Dividing these metrics doesn't make sense because a spike in numerator would cause the alert to persist, even if subsequent fetch/index operations are normal. Adding `increase` changes the query to check if operations took, on average, more than X over a 1-minute interval, which was likely the original intent of this alert.
This commit is contained in:
parent
242054f7dc
commit
97a31f34e5
2 changed files with 4 additions and 4 deletions
|
|
@ -1147,7 +1147,7 @@ groups:
|
||||||
severity: warning
|
severity: warning
|
||||||
- name: Elasticsearch High Indexing Latency
|
- name: Elasticsearch High Indexing Latency
|
||||||
description: "The indexing latency on Elasticsearch cluster is higher than the threshold."
|
description: "The indexing latency on Elasticsearch cluster is higher than the threshold."
|
||||||
query: "elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005"
|
query: "increase(elasticsearch_indices_indexing_index_time_seconds_total[1m]) / increase(elasticsearch_indices_indexing_index_total[1m]) > 0.0005"
|
||||||
severity: warning
|
severity: warning
|
||||||
for: 10m
|
for: 10m
|
||||||
- name: Elasticsearch High Indexing Rate
|
- name: Elasticsearch High Indexing Rate
|
||||||
|
|
@ -1162,7 +1162,7 @@ groups:
|
||||||
for: 5m
|
for: 5m
|
||||||
- name: Elasticsearch High Query Latency
|
- name: Elasticsearch High Query Latency
|
||||||
description: "The query latency on Elasticsearch cluster is higher than the threshold."
|
description: "The query latency on Elasticsearch cluster is higher than the threshold."
|
||||||
query: "elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1"
|
query: "increase(elasticsearch_indices_search_fetch_time_seconds[1m]) / increase(elasticsearch_indices_search_fetch_total[1m]) > 1"
|
||||||
severity: warning
|
severity: warning
|
||||||
for: 5m
|
for: 5m
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -140,7 +140,7 @@ groups:
|
||||||
description: "No new documents for 10 min!\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "No new documents for 10 min!\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
- alert: ElasticsearchHighIndexingLatency
|
- alert: ElasticsearchHighIndexingLatency
|
||||||
expr: 'elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005'
|
expr: 'increase(elasticsearch_indices_indexing_index_time_seconds_total[1m]) / increase(elasticsearch_indices_indexing_index_total[1m]) > 0.0005'
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
|
@ -167,7 +167,7 @@ groups:
|
||||||
description: "The query rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "The query rate on Elasticsearch cluster is higher than the threshold.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
- alert: ElasticsearchHighQueryLatency
|
- alert: ElasticsearchHighQueryLatency
|
||||||
expr: 'elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1'
|
expr: 'increase(elasticsearch_indices_search_fetch_time_seconds[1m]) / increase(elasticsearch_indices_search_fetch_total[1m]) > 1'
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue