Add caddy.yml (#450)

This commit is contained in:
Felix Bühler 2025-02-04 14:23:14 +01:00 committed by GitHub
parent 70ac7d9cae
commit 10d00c66da
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 55 additions and 1 deletions

View file

@ -67,6 +67,7 @@ Collection available here: **[https://samber.github.io/awesome-prometheus-alerts
- [Apache](https://samber.github.io/awesome-prometheus-alerts/rules#apache)
- [HaProxy](https://samber.github.io/awesome-prometheus-alerts/rules#haproxy)
- [Traefik](https://samber.github.io/awesome-prometheus-alerts/rules#traefik)
- [Caddy](https://samber.github.io/awesome-prometheus-alerts/rules#caddy)
#### Runtimes

View file

@ -1950,6 +1950,27 @@ groups:
severity: critical
for: 1m
- name: Caddy
exporters:
- name: Embedded exporter
doc_url: https://caddyserver.com/docs/metrics
rules:
- name: Caddy Reverse Proxy Down
description: "All Caddy reverse proxies are down"
query: "count(caddy_reverse_proxy_upstreams_healthy) by (upstream) == 0"
severity: critical
for: 0m
- name: Caddy high HTTP 4xx error rate service
description: "Caddy service 4xx error rate is above 5%"
query: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"4.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5'
severity: critical
for: 1m
- name: Caddy high HTTP 5xx error rate service
description: "Caddy service 5xx error rate is above 5%"
query: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"5.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5'
severity: critical
for: 1m
- name: Runtimes
services:
- name: PHP-FPM
@ -2975,7 +2996,7 @@ groups:
- slug: embedded-exporter
rules:
- name: Grafana Alloy service down
description: Alloy on (instance {{ $labels.instance }}) is not responding or has stopped running.
description: Alloy on (instance {{ $labels.instance }}) is not responding or has stopped running.
query: 'count by (instance) (alloy_build_info) unless count by (instance) (alloy_build_info offset 2m) '
severity: critical

32
dist/rules/caddy/embedded-exporter.yml vendored Normal file
View file

@ -0,0 +1,32 @@
groups:
- name: EmbeddedExporter
rules:
- alert: CaddyReverseProxyDown
expr: 'count(caddy_reverse_proxy_upstreams_healthy) by (upstream) == 0'
for: 0m
labels:
severity: critical
annotations:
summary: Caddy reverse proxy down (instance {{ $labels.instance }})
description: "All Caddy reverse proxies are down\n LABELS = {{ $labels }}"
- alert: CaddyHighHttp4xxErrorRateService
expr: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"4.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5'
for: 1m
labels:
severity: critical
annotations:
summary: Caddy high HTTP 4xx error rate service (instance {{ $labels.instance }})
description: "Caddy service 4xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CaddyHighHttp5xxErrorRateService
expr: 'sum(rate(caddy_http_request_duration_seconds_count{code=~"5.."}[3m])) by (instance) / sum(rate(caddy_http_request_duration_seconds_count[3m])) by (instance) * 100 > 5'
for: 1m
labels:
severity: critical
annotations:
summary: Caddy high HTTP 5xx error rate service (instance {{ $labels.instance }})
description: "Caddy service 5xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"