mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 00:47:18 +08:00
Improve Jenkins node alerts to better handle servers with multiple nodes (#484)
This commit is contained in:
parent
d6589237e1
commit
79f2858037
2 changed files with 21 additions and 7 deletions
|
|
@ -3259,9 +3259,14 @@ groups:
|
|||
slug: metric-plugin
|
||||
doc_url: https://plugins.jenkins.io/prometheus/
|
||||
rules:
|
||||
- name: Jenkins offline
|
||||
description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
|
||||
query: "jenkins_node_offline_value > 1"
|
||||
- name: Jenkins node offline
|
||||
description: "At least one Jenkins node offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
|
||||
query: "jenkins_node_offline_value > 0"
|
||||
severity: critical
|
||||
for: 5m
|
||||
- name: Jenkins no node online
|
||||
description: "No Jenkins nodes are online: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
|
||||
query: "jenkins_node_online_value == 0"
|
||||
severity: critical
|
||||
- name: Jenkins healthcheck
|
||||
description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
|
||||
|
|
|
|||
17
dist/rules/jenkins/metric-plugin.yml
vendored
17
dist/rules/jenkins/metric-plugin.yml
vendored
|
|
@ -5,14 +5,23 @@ groups:
|
|||
|
||||
rules:
|
||||
|
||||
- alert: JenkinsOffline
|
||||
expr: 'jenkins_node_offline_value > 1'
|
||||
- alert: JenkinsNodeOffline
|
||||
expr: 'jenkins_node_offline_value > 0'
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Jenkins node offline (instance {{ $labels.instance }})
|
||||
description: "At least one Jenkins node offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: JenkinsNoNodeOnline
|
||||
expr: 'jenkins_node_online_value == 0'
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Jenkins offline (instance {{ $labels.instance }})
|
||||
description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
summary: Jenkins no node online (instance {{ $labels.instance }})
|
||||
description: "No Jenkins nodes are online: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: JenkinsHealthcheck
|
||||
expr: 'jenkins_health_check_score < 1'
|
||||
|
|
|
|||
Loading…
Reference in a new issue