diff --git a/README.md b/README.md index 0d3a44b..b912c0b 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Collection available here: **[https://samber.github.io/awesome-prometheus-alerts - [Keycloak](https://samber.github.io/awesome-prometheus-alerts/rules#keycloak) - [Cloudflare](https://samber.github.io/awesome-prometheus-alerts/rules#cloudflare) - [SNMP](https://samber.github.io/awesome-prometheus-alerts/rules#snmp) +- [WireGuard](https://samber.github.io/awesome-prometheus-alerts/rules#wireguard) #### Cloud providers diff --git a/_data/rules.yml b/_data/rules.yml index 48cc40d..3bbbdf9 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -3871,6 +3871,35 @@ groups: severity: info comments: sysUpTime is in centiseconds (hundredths of a second). + - name: WireGuard + exporters: + - name: MindFlavor/prometheus_wireguard_exporter + slug: mindflavor-prometheus-wireguard-exporter + doc_url: https://github.com/MindFlavor/prometheus_wireguard_exporter + rules: + - name: WireGuard peer handshake too old + description: "WireGuard peer {{ $labels.public_key }} on interface {{ $labels.interface }} has not had a handshake for over 5 minutes. The tunnel may be down." + query: 'time() - wireguard_latest_handshake_seconds > 300 and wireguard_latest_handshake_seconds > 0' + severity: warning + for: 2m + comments: | + The threshold of 300 seconds (5 minutes) is a rough default. WireGuard peers that are idle but reachable + typically re-handshake every 2 minutes. Adjust based on your keepalive interval. + The `> 0` guard excludes peers that have never completed a handshake (covered by a separate rule). + - name: WireGuard peer handshake never established + description: "WireGuard peer {{ $labels.public_key }} on interface {{ $labels.interface }} has never completed a handshake. Check peer configuration and network connectivity." + query: 'wireguard_latest_handshake_seconds == 0' + severity: critical + for: 5m + - name: WireGuard no traffic on peer + description: "WireGuard peer {{ $labels.public_key }} on interface {{ $labels.interface }} has had no traffic for 15 minutes despite an active handshake." + query: '(rate(wireguard_sent_bytes_total[15m]) + rate(wireguard_received_bytes_total[15m])) == 0 and wireguard_latest_handshake_seconds > 0 and (time() - wireguard_latest_handshake_seconds) < 300' + severity: warning + for: 15m + comments: | + This alert fires when a peer has a recent handshake but zero traffic flow. + May indicate routing issues or a misconfigured allowed-ips. + Only useful if you expect continuous traffic on all peers. - name: Cloud providers services: