diff --git a/_data/rules.yml b/_data/rules.yml index 721177d..9209479 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1399,23 +1399,6 @@ groups: - name: Traefik exporters: - - name: Embedded exporter - doc_url: https://docs.traefik.io/observability/metrics/prometheus/ - rules: - - name: Traefik backend down - description: All Traefik backends are down - query: 'count(traefik_backend_server_up) by (backend) == 0' - severity: critical - - name: Traefik high HTTP 4xx error rate backend - description: Traefik backend 4xx error rate is above 5% - query: 'sum(rate(traefik_backend_requests_total{code=~"4.*"}[3m])) by (backend) / sum(rate(traefik_backend_requests_total[3m])) by (backend) * 100 > 5' - severity: critical - for: 1m - - name: Traefik high HTTP 5xx error rate backend - description: Traefik backend 5xx error rate is above 5% - query: 'sum(rate(traefik_backend_requests_total{code=~"5.*"}[3m])) by (backend) / sum(rate(traefik_backend_requests_total[3m])) by (backend) * 100 > 5' - severity: critical - for: 1m - name: Embedded exporter v2 doc_url: https://docs.traefik.io/observability/metrics/prometheus/ rules: @@ -1433,6 +1416,23 @@ groups: query: 'sum(rate(traefik_service_requests_total{code=~"5.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5' severity: critical for: 1m + - name: Embedded exporter v1 + doc_url: https://docs.traefik.io/observability/metrics/prometheus/ + rules: + - name: Traefik backend down + description: All Traefik backends are down + query: 'count(traefik_backend_server_up) by (backend) == 0' + severity: critical + - name: Traefik high HTTP 4xx error rate backend + description: Traefik backend 4xx error rate is above 5% + query: 'sum(rate(traefik_backend_requests_total{code=~"4.*"}[3m])) by (backend) / sum(rate(traefik_backend_requests_total[3m])) by (backend) * 100 > 5' + severity: critical + for: 1m + - name: Traefik high HTTP 5xx error rate backend + description: Traefik backend 5xx error rate is above 5% + query: 'sum(rate(traefik_backend_requests_total{code=~"5.*"}[3m])) by (backend) / sum(rate(traefik_backend_requests_total[3m])) by (backend) * 100 > 5' + severity: critical + for: 1m - name: Runtimes services: