chore: add Prometheus alerts for HAProxy v2

ref #87
This commit is contained in:
Benjamin Dos Santos 2021-01-05 16:45:52 +01:00
parent 209fdf86e8
commit 1b7c36666c
No known key found for this signature in database
GPG key ID: A4EB2BECABD13244

View file

@ -1070,12 +1070,84 @@ groups:
- name: Embedded exporter (HAProxy >= v2)
doc_url: https://github.com/haproxy/haproxy/tree/master/contrib/prometheus-exporter
rules:
- name: HAProxy high HTTP 4xx error rate backend
description: Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}
query: '((sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
severity: critical
for: 1m
- name: HAProxy high HTTP 4xx error rate backend
description: Too many HTTP requests with status 5xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}
query: ((sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5
severity: critical
for: 1m
- name: HAProxy high HTTP 4xx error rate server
description: Too many HTTP requests with status 4xx (> 5%) on server {{ $labels.server }}
query: ((sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5
severity: critical
for: 1m
- name: HAProxy high HTTP 5xx error rate server
description: Too many HTTP requests with status 5xx (> 5%) on server {{ $labels.server }}
query: ((sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5
severity: critical
for: 1m
- name: HAProxy server response errors
description: Too many response errors to {{ $labels.server }} server (> 5%).
query: (sum by (server) (rate(haproxy_server_response_errors_total[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100 > 5
severity: critical
for: 1m
- name: HAProxy backend connection errors
description: Too many connection errors to {{ $labels.fqdn }}/{{ $labels.backend }} backend (> 100 req/s). Request throughput may be to high.
query: (sum by (proxy) (rate(haproxy_backend_connection_errors_total[1m]))) > 100
severity: critical
for: 1m
- name: HAProxy server connection errors
description: Too many connection errors to {{ $labels.server }} server (> 100 req/s). Request throughput may be to high.
query: (sum by (proxy) (rate(haproxy_backend_connection_errors_total[1m]))) > 100
severity: critical
- name: HAProxy backend max active session
description: HAproxy backend {{ $labels.fqdn }}/{{ $labels.backend }} is reaching session limit (> 80%).
query: avg_over_time(((sum by (proxy) (haproxy_server_max_sessions)) / (sum by (proxy) (haproxy_server_limit_sessions))) [2m]) * 100 > 80
severity: warning
for: 2m
- name: HAProxy pending requests
description: Some HAProxy requests are pending on {{ $labels.fqdn }}/{{ $labels.backend }} backend
query: sum by (proxy) haproxy_backend_current_queue > 0
severity: warning
for: 2m
- name: HAProxy HTTP slowing down
description: Average request time is increasing
query: avg by (proxy) (haproxy_backend_max_total_time_seconds) > 1
severity: warning
for: 1m
- name: HAProxy retry high
description: High rate of retry on {{ $labels.fqdn }}/{{ $labels.backend }} backend
query: sum by (proxy) (rate(haproxy_backend_retry_warnings_total[1m])) > 10
severity: warning
for: 2m
- name: HAProxy proxy down
description: HAProxy proxy is down
query: haproxy_backend_up == 0
severity: critical
- name: HAProxy server down
description: HAProxy backend is down
query: haproxy_backend_active_servers == 0
severity: critical
- name: HAProxy frontend security blocked requests
description: HAProxy is blocking requests for security reason
query: rate(sum by (proxy) (haproxy_frontend_denied_connections_total)) > 10
severity: warning
for: 2m
- name: HAProxy server healthcheck failure
description: Some server healthcheck are failing on {{ $labels.server }}
query: increase(haproxy_server_check_failures_total[1m]) > 0
severity: warning
for: 1m
- name: prometheus/haproxy_exporter (HAProxy < v2)
doc_url: https://github.com/prometheus/haproxy_exporter
rules:
- name: HAProxy down
description: HAProxy down
query: 'haproxy_up == 0'
query: "haproxy_up == 0"
severity: critical
- name: HAProxy high HTTP 4xx error rate backend
description: Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}