mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-26 11:27:00 +08:00
Update rules.yml
This commit is contained in:
parent
58be245e66
commit
ed9ea6412a
1 changed files with 17 additions and 25 deletions
|
|
@ -2004,34 +2004,26 @@ groups:
|
||||||
- name: Loki
|
- name: Loki
|
||||||
exporters:
|
exporters:
|
||||||
- rules:
|
- rules:
|
||||||
- name: LokiProcTooManyRestarts
|
- name: Loki process too many restarts
|
||||||
query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2
|
|
||||||
for: 0m
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
description: A loki process had too many restarts (target {{ $labels.instance }})
|
description: A loki process had too many restarts (target {{ $labels.instance }})
|
||||||
- name: CortexRulerConfigurationReloadFailure
|
query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2
|
||||||
query: cortex_ruler_config_last_reload_successful != 1
|
|
||||||
for: 0m
|
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
- name: Cortex
|
||||||
|
exporters:
|
||||||
|
- rules:
|
||||||
|
- name: Cortex ruler configuration reload failure
|
||||||
description: Cortex ruler configuration reload failure (instance {{ $labels.instance }})
|
description: Cortex ruler configuration reload failure (instance {{ $labels.instance }})
|
||||||
- name: CortexNotConnectedToAlertmanager
|
query: cortex_ruler_config_last_reload_successful != 1
|
||||||
|
severity: warning
|
||||||
|
- name: Cortex not connected to Alertmanager
|
||||||
|
description: Cortex not connected to Alertmanager (instance {{ $labels.instance }})
|
||||||
query: cortex_prometheus_notifications_alertmanagers_discovered < 1
|
query: cortex_prometheus_notifications_alertmanagers_discovered < 1
|
||||||
for: 0m
|
severity: critical
|
||||||
severity: severe
|
- name: Cortex notification are being dropped
|
||||||
annotations:
|
|
||||||
description: Cortex not connected to alertmanager (instance {{ $labels.instance }})
|
|
||||||
- name: CortexNotificationAreBeingDropped
|
|
||||||
query: rate(cortex_prometheus_notifications_dropped_total[5m]) > 0
|
|
||||||
for: 0m
|
|
||||||
severity: severe
|
|
||||||
annotations:
|
|
||||||
description: Cortex notification are being dropped due to errors (instance {{ $labels.instance }})
|
description: Cortex notification are being dropped due to errors (instance {{ $labels.instance }})
|
||||||
- name: CortexNotificationError
|
query: rate(cortex_prometheus_notifications_dropped_total[5m]) > 0
|
||||||
query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0
|
severity: critical
|
||||||
for: 0m
|
- name: Cortex notification error
|
||||||
severity: severe
|
|
||||||
annotations:
|
|
||||||
description: Cortex is failing when sengin alert notifications (instance {{ $labels.instance }})
|
description: Cortex is failing when sengin alert notifications (instance {{ $labels.instance }})
|
||||||
|
query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0
|
||||||
|
severity: critical
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue