mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 00:47:18 +08:00
add argocd rules (#309)
* add argocd rules * fix(argocd): move contrib into _data/rules.yml instead of dist/... Co-authored-by: Samuel Berthe <dev@samuel-berthe.fr>
This commit is contained in:
parent
3ae036c975
commit
55b049eb28
2 changed files with 20 additions and 1 deletions
|
|
@ -59,6 +59,7 @@ Collection available here: **[https://awesome-prometheus-alerts.grep.to](https:/
|
|||
- [Etcd](https://awesome-prometheus-alerts.grep.to/rules#etcd)
|
||||
- [Linkerd](https://awesome-prometheus-alerts.grep.to/rules#linkerd)
|
||||
- [Istio](https://awesome-prometheus-alerts.grep.to/rules#istio)
|
||||
- [ArgoCD](https://awesome-prometheus-alerts.grep.to/rules#argocd)
|
||||
|
||||
#### Network, security and storage
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ groups:
|
|||
query: 'sum by (job) (up) == 0'
|
||||
severity: critical
|
||||
- name: Prometheus target missing with warmup time
|
||||
description: Allow a job time to start up (10 minutes) before alerting that it's down.
|
||||
description: Allow a job time to start up (10 minutes) before alerting that it's down.
|
||||
query: 'sum by (instance, job) ((up == 0) * on (instance) group_right(job) (node_time_seconds - node_boot_time_seconds > 600))'
|
||||
severity: critical
|
||||
- name: Prometheus configuration reload failure
|
||||
|
|
@ -1952,6 +1952,24 @@ groups:
|
|||
query: 'sum(rate(pilot_duplicate_envoy_clusters{}[5m])) > 0'
|
||||
severity: critical
|
||||
|
||||
- name: ArgoCD
|
||||
exporters:
|
||||
- name: Embedded exporter
|
||||
slug: embedded-exporter
|
||||
doc_url: https://argo-cd.readthedocs.io/en/stable/operator-manual/metrics/
|
||||
rules:
|
||||
- name: ArgoCD service not synced
|
||||
description: Service {{ $labels.name }} run by argo is currently not in sync.
|
||||
query: 'argocd_app_info{sync_status!="Synced"} != 0'
|
||||
severity: warning
|
||||
for: 15m
|
||||
- name: ArgoCD service unhealthy
|
||||
description: Service {{ $labels.name }} run by argo is currently not healthy.
|
||||
query: 'argocd_app_info{health_status!="Healthy"} != 0'
|
||||
severity: warning
|
||||
for: 15m
|
||||
|
||||
|
||||
- name: Network, security and storage
|
||||
services:
|
||||
- name: Ceph
|
||||
|
|
|
|||
Loading…
Reference in a new issue