Update rules.yml

This commit is contained in:
Samuel Berthe 2021-07-04 23:50:46 +02:00 committed by GitHub
parent 58be245e66
commit ed9ea6412a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2004,34 +2004,26 @@ groups:
- name: Loki - name: Loki
exporters: exporters:
- rules: - rules:
- name: LokiProcTooManyRestarts - name: Loki process too many restarts
query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2
for: 0m
severity: warning
annotations:
description: A loki process had too many restarts (target {{ $labels.instance }}) description: A loki process had too many restarts (target {{ $labels.instance }})
- name: CortexRulerConfigurationReloadFailure query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2
query: cortex_ruler_config_last_reload_successful != 1
for: 0m
severity: warning severity: warning
annotations: - name: Cortex
exporters:
- rules:
- name: Cortex ruler configuration reload failure
description: Cortex ruler configuration reload failure (instance {{ $labels.instance }}) description: Cortex ruler configuration reload failure (instance {{ $labels.instance }})
- name: CortexNotConnectedToAlertmanager query: cortex_ruler_config_last_reload_successful != 1
severity: warning
- name: Cortex not connected to Alertmanager
description: Cortex not connected to Alertmanager (instance {{ $labels.instance }})
query: cortex_prometheus_notifications_alertmanagers_discovered < 1 query: cortex_prometheus_notifications_alertmanagers_discovered < 1
for: 0m severity: critical
severity: severe - name: Cortex notification are being dropped
annotations:
description: Cortex not connected to alertmanager (instance {{ $labels.instance }})
- name: CortexNotificationAreBeingDropped
query: rate(cortex_prometheus_notifications_dropped_total[5m]) > 0
for: 0m
severity: severe
annotations:
description: Cortex notification are being dropped due to errors (instance {{ $labels.instance }}) description: Cortex notification are being dropped due to errors (instance {{ $labels.instance }})
- name: CortexNotificationError query: rate(cortex_prometheus_notifications_dropped_total[5m]) > 0
query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0 severity: critical
for: 0m - name: Cortex notification error
severity: severe
annotations:
description: Cortex is failing when sengin alert notifications (instance {{ $labels.instance }}) description: Cortex is failing when sengin alert notifications (instance {{ $labels.instance }})
query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0
severity: critical