mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 00:47:18 +08:00
feat: add Keycloak alerting rules (aerogear/keycloak-metrics-spi) (#517)
* feat: add Keycloak alerting rules (aerogear/keycloak-metrics-spi) * fix: correct Keycloak metrics-spi metric names and query grouping
This commit is contained in:
parent
258220b4f0
commit
88e2c19017
2 changed files with 46 additions and 0 deletions
|
|
@ -116,6 +116,7 @@ Collection available here: **[https://samber.github.io/awesome-prometheus-alerts
|
|||
- [CoreDNS](https://samber.github.io/awesome-prometheus-alerts/rules#coredns)
|
||||
- [FreeSwitch](https://samber.github.io/awesome-prometheus-alerts/rules#freeswitch)
|
||||
- [Hashicorp Vault](https://samber.github.io/awesome-prometheus-alerts/rules#hashicorp-vault)
|
||||
- [Keycloak](https://samber.github.io/awesome-prometheus-alerts/rules#keycloak)
|
||||
- [Cloudflare](https://samber.github.io/awesome-prometheus-alerts/rules#cloudflare)
|
||||
- [SNMP](https://samber.github.io/awesome-prometheus-alerts/rules#snmp)
|
||||
|
||||
|
|
|
|||
|
|
@ -3578,6 +3578,51 @@ groups:
|
|||
query: "sum(vault_core_active) / count(vault_core_active) <= 0.5"
|
||||
severity: critical
|
||||
|
||||
- name: Keycloak
|
||||
exporters:
|
||||
- name: aerogear/keycloak-metrics-spi
|
||||
slug: aerogear-keycloak-metrics-spi
|
||||
doc_url: https://github.com/aerogear/keycloak-metrics-spi
|
||||
rules:
|
||||
- name: Keycloak high login failure rate
|
||||
description: "More than 5% of login attempts are failing in realm {{ $labels.realm }} (current value: {{ $value | printf \"%.1f\" }}%)."
|
||||
query: '(sum by (realm) (rate(keycloak_failed_login_attempts_total[5m])) / (sum by (realm) (rate(keycloak_logins_total[5m])) + sum by (realm) (rate(keycloak_failed_login_attempts_total[5m])))) * 100 > 5 and (sum by (realm) (rate(keycloak_logins_total[5m])) + sum by (realm) (rate(keycloak_failed_login_attempts_total[5m]))) > 0'
|
||||
severity: warning
|
||||
for: 5m
|
||||
comments: |
|
||||
Threshold of 5% is a rough default. Adjust based on your user base and expected error rates.
|
||||
A spike in failed logins may indicate a brute-force attack or misconfigured client.
|
||||
- name: Keycloak no successful logins
|
||||
description: "No successful logins in realm {{ $labels.realm }} for the last 15 minutes."
|
||||
query: 'sum by (realm) (rate(keycloak_logins_total[15m])) == 0 and (sum by (realm) (rate(keycloak_logins_total[15m])) + sum by (realm) (rate(keycloak_failed_login_attempts_total[15m]))) > 0'
|
||||
severity: critical
|
||||
for: 5m
|
||||
comments: Only fires when login attempts exist but none succeed — may indicate an authentication outage.
|
||||
- name: Keycloak high token refresh error rate
|
||||
description: "More than 10% of token refresh attempts are failing in realm {{ $labels.realm }} (current value: {{ $value | printf \"%.1f\" }}%)."
|
||||
query: '(sum by (realm) (rate(keycloak_refresh_tokens_errors_total[5m])) / sum by (realm) (rate(keycloak_refresh_tokens_total[5m]))) * 100 > 10 and sum by (realm) (rate(keycloak_refresh_tokens_total[5m])) > 0'
|
||||
severity: warning
|
||||
for: 5m
|
||||
comments: Threshold of 10% is a rough default. High refresh token errors may indicate expired sessions or token store issues.
|
||||
- name: Keycloak high code-to-token exchange error rate
|
||||
description: "More than 10% of code-to-token exchanges are failing in realm {{ $labels.realm }} (current value: {{ $value | printf \"%.1f\" }}%)."
|
||||
query: '(sum by (realm) (rate(keycloak_code_to_tokens_errors_total[5m])) / sum by (realm) (rate(keycloak_code_to_tokens_total[5m]))) * 100 > 10 and sum by (realm) (rate(keycloak_code_to_tokens_total[5m])) > 0'
|
||||
severity: warning
|
||||
for: 5m
|
||||
comments: Threshold of 10% is a rough default. Code-to-token failures may indicate misconfigured OAuth clients or replay attacks.
|
||||
- name: Keycloak high registration failure rate
|
||||
description: "More than 10% of registration attempts are failing in realm {{ $labels.realm }} (current value: {{ $value | printf \"%.1f\" }}%)."
|
||||
query: '(sum by (realm) (rate(keycloak_registrations_errors_total[5m])) / sum by (realm) (rate(keycloak_registrations_total[5m]))) * 100 > 10 and sum by (realm) (rate(keycloak_registrations_total[5m])) > 0'
|
||||
severity: warning
|
||||
for: 5m
|
||||
comments: Threshold of 10% is a rough default.
|
||||
- name: Keycloak slow request response time
|
||||
description: "Keycloak {{ $labels.method }} requests are taking more than 2 seconds on average."
|
||||
query: 'sum by (method) (rate(keycloak_request_duration_sum[5m])) / sum by (method) (rate(keycloak_request_duration_count[5m])) > 2 and sum by (method) (rate(keycloak_request_duration_count[5m])) > 0'
|
||||
severity: warning
|
||||
for: 5m
|
||||
comments: Threshold of 2 seconds is a rough default. Adjust based on your performance requirements.
|
||||
|
||||
- name: Cloudflare
|
||||
exporters:
|
||||
- name: lablabs/cloudflare-exporter
|
||||
|
|
|
|||
Loading…
Reference in a new issue