From ed9ea6412a847f695d5678ee0b9e99061ffddd13 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Sun, 4 Jul 2021 23:50:46 +0200 Subject: [PATCH] Update rules.yml --- _data/rules.yml | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index fb2408d..d6bd3b7 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -2004,34 +2004,26 @@ groups: - name: Loki exporters: - rules: - - name: LokiProcTooManyRestarts - query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2 - for: 0m - severity: warning - annotations: + - name: Loki process too many restarts description: A loki process had too many restarts (target {{ $labels.instance }}) - - name: CortexRulerConfigurationReloadFailure - query: cortex_ruler_config_last_reload_successful != 1 - for: 0m + query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2 severity: warning - annotations: + - name: Cortex + exporters: + - rules: + - name: Cortex ruler configuration reload failure description: Cortex ruler configuration reload failure (instance {{ $labels.instance }}) - - name: CortexNotConnectedToAlertmanager + query: cortex_ruler_config_last_reload_successful != 1 + severity: warning + - name: Cortex not connected to Alertmanager + description: Cortex not connected to Alertmanager (instance {{ $labels.instance }}) query: cortex_prometheus_notifications_alertmanagers_discovered < 1 - for: 0m - severity: severe - annotations: - description: Cortex not connected to alertmanager (instance {{ $labels.instance }}) - - name: CortexNotificationAreBeingDropped - query: rate(cortex_prometheus_notifications_dropped_total[5m]) > 0 - for: 0m - severity: severe - annotations: + severity: critical + - name: Cortex notification are being dropped description: Cortex notification are being dropped due to errors (instance {{ $labels.instance }}) - - name: CortexNotificationError - query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0 - for: 0m - severity: severe - annotations: + query: rate(cortex_prometheus_notifications_dropped_total[5m]) > 0 + severity: critical + - name: Cortex notification error description: Cortex is failing when sengin alert notifications (instance {{ $labels.instance }}) - \ No newline at end of file + query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0 + severity: critical