mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-22 01:17:19 +08:00
Loki alerts (#218)
Co-authored-by: Samuel Berthe <dev@samuel-berthe.fr>
This commit is contained in:
parent
243c0280cf
commit
c2b8178304
2 changed files with 28 additions and 0 deletions
|
|
@ -72,6 +72,8 @@ Collection available here: **[https://awesome-prometheus-alerts.grep.to](https:/
|
|||
#### Other
|
||||
|
||||
- [Thanos](https://awesome-prometheus-alerts.grep.to/rules#thanos)
|
||||
- [Loki](https://awesome-prometheus-alerts.grep.to/rules#loki)
|
||||
- [Cortex](https://awesome-prometheus-alerts.grep.to/rules#cortex)
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
|
|
|
|||
|
|
@ -2001,3 +2001,29 @@ groups:
|
|||
description: Thanos compaction has not run in 24 hours.
|
||||
query: '(time() - thanos_objstore_bucket_last_successful_upload_time) > 24*60*60'
|
||||
severity: critical
|
||||
- name: Loki
|
||||
exporters:
|
||||
- rules:
|
||||
- name: Loki process too many restarts
|
||||
description: A loki process had too many restarts (target {{ $labels.instance }})
|
||||
query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2
|
||||
severity: warning
|
||||
- name: Cortex
|
||||
exporters:
|
||||
- rules:
|
||||
- name: Cortex ruler configuration reload failure
|
||||
description: Cortex ruler configuration reload failure (instance {{ $labels.instance }})
|
||||
query: cortex_ruler_config_last_reload_successful != 1
|
||||
severity: warning
|
||||
- name: Cortex not connected to Alertmanager
|
||||
description: Cortex not connected to Alertmanager (instance {{ $labels.instance }})
|
||||
query: cortex_prometheus_notifications_alertmanagers_discovered < 1
|
||||
severity: critical
|
||||
- name: Cortex notification are being dropped
|
||||
description: Cortex notification are being dropped due to errors (instance {{ $labels.instance }})
|
||||
query: rate(cortex_prometheus_notifications_dropped_total[5m]) > 0
|
||||
severity: critical
|
||||
- name: Cortex notification error
|
||||
description: Cortex is failing when sengin alert notifications (instance {{ $labels.instance }})
|
||||
query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0
|
||||
severity: critical
|
||||
|
|
|
|||
Loading…
Reference in a new issue