Improve Jenkins node alerts to better handle servers with multiple nodes (#484)

This commit is contained in:
Simon Matic Langford 2025-11-17 13:56:04 +00:00 committed by GitHub
parent d6589237e1
commit 79f2858037
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 21 additions and 7 deletions

View file

@ -3259,9 +3259,14 @@ groups:
slug: metric-plugin
doc_url: https://plugins.jenkins.io/prometheus/
rules:
- name: Jenkins offline
description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
query: "jenkins_node_offline_value > 1"
- name: Jenkins node offline
description: "At least one Jenkins node offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
query: "jenkins_node_offline_value > 0"
severity: critical
for: 5m
- name: Jenkins no node online
description: "No Jenkins nodes are online: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
query: "jenkins_node_online_value == 0"
severity: critical
- name: Jenkins healthcheck
description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"

View file

@ -5,14 +5,23 @@ groups:
rules:
- alert: JenkinsOffline
expr: 'jenkins_node_offline_value > 1'
- alert: JenkinsNodeOffline
expr: 'jenkins_node_offline_value > 0'
for: 5m
labels:
severity: critical
annotations:
summary: Jenkins node offline (instance {{ $labels.instance }})
description: "At least one Jenkins node offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsNoNodeOnline
expr: 'jenkins_node_online_value == 0'
for: 0m
labels:
severity: critical
annotations:
summary: Jenkins offline (instance {{ $labels.instance }})
description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
summary: Jenkins no node online (instance {{ $labels.instance }})
description: "No Jenkins nodes are online: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsHealthcheck
expr: 'jenkins_health_check_score < 1'