From 10d00c66da78c3c47ff3f76cf9903ad821b5fbbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=BChler?= Date: Tue, 4 Feb 2025 14:23:14 +0100 Subject: [PATCH] Add caddy.yml (#450) --- README.md | 1 + _data/rules.yml | 23 +++++++++++++++++- dist/rules/caddy/embedded-exporter.yml | 32 ++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 dist/rules/caddy/embedded-exporter.yml diff --git a/README.md b/README.md index fdb5643..7011402 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ Collection available here: **[https://samber.github.io/awesome-prometheus-alerts - [Apache](https://samber.github.io/awesome-prometheus-alerts/rules#apache) - [HaProxy](https://samber.github.io/awesome-prometheus-alerts/rules#haproxy) - [Traefik](https://samber.github.io/awesome-prometheus-alerts/rules#traefik) +- [Caddy](https://samber.github.io/awesome-prometheus-alerts/rules#caddy) #### Runtimes diff --git a/_data/rules.yml b/_data/rules.yml index 6ad5f9d..00aa84a 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1950,6 +1950,27 @@ groups: severity: critical for: 1m + - name: Caddy + exporters: + - name: Embedded exporter + doc_url: https://caddyserver.com/docs/metrics + rules: + - name: Caddy Reverse Proxy Down + description: "All Caddy reverse proxies are down" + query: "count(caddy_reverse_proxy_upstreams_healthy) by (upstream) == 0" + severity: critical + for: 0m + - name: Caddy high HTTP 4xx error rate service + description: "Caddy service 4xx error rate is above 5%" + query: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"4.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5' + severity: critical + for: 1m + - name: Caddy high HTTP 5xx error rate service + description: "Caddy service 5xx error rate is above 5%" + query: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"5.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5' + severity: critical + for: 1m + - name: Runtimes services: - name: PHP-FPM @@ -2975,7 +2996,7 @@ groups: - slug: embedded-exporter rules: - name: Grafana Alloy service down - description: Alloy on (instance {{ $labels.instance }}) is not responding or has stopped running. + description: Alloy on (instance {{ $labels.instance }}) is not responding or has stopped running. query: 'count by (instance) (alloy_build_info) unless count by (instance) (alloy_build_info offset 2m) ' severity: critical diff --git a/dist/rules/caddy/embedded-exporter.yml b/dist/rules/caddy/embedded-exporter.yml new file mode 100644 index 0000000..4f23002 --- /dev/null +++ b/dist/rules/caddy/embedded-exporter.yml @@ -0,0 +1,32 @@ +groups: + +- name: EmbeddedExporter + + rules: + + - alert: CaddyReverseProxyDown + expr: 'count(caddy_reverse_proxy_upstreams_healthy) by (upstream) == 0' + for: 0m + labels: + severity: critical + annotations: + summary: Caddy reverse proxy down (instance {{ $labels.instance }}) + description: "All Caddy reverse proxies are down\n LABELS = {{ $labels }}" + + - alert: CaddyHighHttp4xxErrorRateService + expr: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"4.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5' + for: 1m + labels: + severity: critical + annotations: + summary: Caddy high HTTP 4xx error rate service (instance {{ $labels.instance }}) + description: "Caddy service 4xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: CaddyHighHttp5xxErrorRateService + expr: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"5.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5' + for: 1m + labels: + severity: critical + annotations: + summary: Caddy high HTTP 5xx error rate service (instance {{ $labels.instance }}) + description: "Caddy service 5xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"