From 2bfb8d8ddc3afd9f933397ef3b68db358df1dad6 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Sun, 2 Oct 2022 18:04:11 +0200 Subject: [PATCH] fix(argocd): move contrib into _data/rules.yml instead of dist/... --- _data/rules.yml | 20 +++++++++++++++++++- dist/rules/argocd/argocd.yml | 25 ------------------------- 2 files changed, 19 insertions(+), 26 deletions(-) delete mode 100644 dist/rules/argocd/argocd.yml diff --git a/_data/rules.yml b/_data/rules.yml index c72ef75..286a567 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -27,7 +27,7 @@ groups: query: 'sum by (job) (up) == 0' severity: critical - name: Prometheus target missing with warmup time - description: Allow a job time to start up (10 minutes) before alerting that it's down. + description: Allow a job time to start up (10 minutes) before alerting that it's down. query: 'sum by (instance, job) ((up == 0) * on (instance) group_right(job) (node_time_seconds - node_boot_time_seconds > 600))' severity: critical - name: Prometheus configuration reload failure @@ -1952,6 +1952,24 @@ groups: query: 'sum(rate(pilot_duplicate_envoy_clusters{}[5m])) > 0' severity: critical + - name: ArgoCD + exporters: + - name: Embedded exporter + slug: embedded-exporter + doc_url: https://argo-cd.readthedocs.io/en/stable/operator-manual/metrics/ + rules: + - name: ArgoCD service not synced + description: Service {{ $labels.name }} run by argo is currently not in sync. + query: 'argocd_app_info{sync_status!="Synced"} != 0' + severity: warning + for: 15m + - name: ArgoCD service unhealthy + description: Service {{ $labels.name }} run by argo is currently not healthy. + query: 'argocd_app_info{health_status!="Healthy"} != 0' + severity: warning + for: 15m + + - name: Network, security and storage services: - name: Ceph diff --git a/dist/rules/argocd/argocd.yml b/dist/rules/argocd/argocd.yml deleted file mode 100644 index 8c144e2..0000000 --- a/dist/rules/argocd/argocd.yml +++ /dev/null @@ -1,25 +0,0 @@ -groups: - -- name: ArgoCD - - rules: - - - alert: ServiceUnSynced - expr: argocd_app_info{sync_status!="Synced"} != 0 - for: 20m - labels: - severity: warning - annotations: - summary: ArgoCD Service (name {{ $labels.name }}) unsynced. - description: "Service {{ $labels.name }} run by argo is currently not in sync please check" - - - - - alert: ServiceUnhealthy - expr: argocd_app_info{health_status!="Healthy"} != 0 - for: 5m - labels: - severity: critical - annotations: - summary: ArgoCD Service (name {{ $labels.name }}) unhealthy. - description: "Service {{ $labels.name }} run by argo is currently not in healthy please check"