awesome-prometheus-alerts/dist/rules/haproxy/haproxy-exporter-v1.yml
2026-04-06 18:38:45 +00:00

150 lines
7.4 KiB
YAML

groups:
- name: HaproxyExporterV1
rules:
- alert: HaproxyDown
expr: 'haproxy_up == 0'
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy down (instance {{ $labels.instance }})
description: "HAProxy down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyHighHttp4xxErrorRateBackend(v1)
expr: 'sum by (backend) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (backend) (rate(haproxy_server_http_responses_total[1m])) * 100 > 5 and sum by (backend) (rate(haproxy_server_http_responses_total[1m])) > 0'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate backend (v1) (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyHighHttp5xxErrorRateBackend(v1)
expr: 'sum by (backend) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (backend) (rate(haproxy_server_http_responses_total[1m])) * 100 > 5 and sum by (backend) (rate(haproxy_server_http_responses_total[1m])) > 0'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate backend (v1) (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on backend {{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyHighHttp4xxErrorRateServer(v1)
expr: 'sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m]) * 100) / sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate server (v1) (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyHighHttp5xxErrorRateServer(v1)
expr: 'sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m]) * 100) / sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate server (v1) (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyServerResponseErrors(v1)
expr: 'sum by (server) (rate(haproxy_server_response_errors_total[1m]) * 100) / sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 5 and sum by (server) (rate(haproxy_server_http_responses_total[1m])) > 0'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy server response errors (v1) (instance {{ $labels.instance }})
description: "Too many response errors to {{ $labels.server }} server (> 5%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyBackendConnectionErrors(v1)
expr: 'sum by (backend) (rate(haproxy_backend_connection_errors_total[1m])) > 100'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy backend connection errors (v1) (instance {{ $labels.instance }})
description: "Too many connection errors to {{ $labels.backend }} backend (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyServerConnectionErrors(v1)
expr: 'sum by (server) (rate(haproxy_server_connection_errors_total[1m])) > 100'
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy server connection errors (v1) (instance {{ $labels.instance }})
description: "Too many connection errors to {{ $labels.server }} server (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyBackendMaxActiveSession
expr: '((sum by (backend) (haproxy_backend_current_sessions * 100) / sum by (backend) (haproxy_backend_limit_sessions))) > 80 and sum by (backend) (haproxy_backend_limit_sessions) > 0'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy backend max active session (instance {{ $labels.instance }})
description: "HAProxy backend {{ $labels.backend }} is reaching session limit (> 80%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyPendingRequests(v1)
expr: 'sum by (backend) (haproxy_backend_current_queue) > 0'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy pending requests (v1) (instance {{ $labels.instance }})
description: "Some HAProxy requests are pending on {{ $labels.backend }} backend\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyHttpSlowingDown(v1)
expr: 'avg by (backend) (haproxy_backend_http_total_time_average_seconds) > 1'
for: 1m
labels:
severity: warning
annotations:
summary: HAProxy HTTP slowing down (v1) (instance {{ $labels.instance }})
description: "Average request time is increasing\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyRetryHigh(v1)
expr: 'sum by (backend) (rate(haproxy_backend_retry_warnings_total[1m])) > 10'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy retry high (v1) (instance {{ $labels.instance }})
description: "High rate of retry on {{ $labels.backend }} backend\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyBackendDown
expr: 'haproxy_backend_up == 0'
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy backend down (instance {{ $labels.instance }})
description: "HAProxy backend is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyServerDown
expr: 'haproxy_server_up == 0'
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy server down (instance {{ $labels.instance }})
description: "HAProxy server is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyFrontendSecurityBlockedRequests(v1)
expr: 'sum by (frontend) (rate(haproxy_frontend_requests_denied_total[2m])) > 10'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy frontend security blocked requests (v1) (instance {{ $labels.instance }})
description: "HAProxy is blocking requests for security reason\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HaproxyServerHealthcheckFailure(v1)
expr: 'increase(haproxy_server_check_failures_total[1m]) > 2'
for: 0m
labels:
severity: warning
annotations:
summary: HAProxy server healthcheck failure (v1) (instance {{ $labels.instance }})
description: "Some server healthcheck are failing on {{ $labels.server }} ({{ $value }} in the last 1m)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"