awesome-prometheus-alerts/dist/rules/jenkins/metric-plugin.yml
Samuel Berthe ccf24bcf03 Publish
2022-06-15 00:08:51 +00:00

68 lines
3 KiB
YAML

groups:
- name: MetricPlugin
rules:
- alert: JenkinsOffline
expr: 'jenkins_node_offline_value > 1'
for: 0m
labels:
severity: critical
annotations:
summary: Jenkins offline (instance {{ $labels.instance }})
description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsHealthcheck
expr: 'jenkins_health_check_score < 1'
for: 0m
labels:
severity: critical
annotations:
summary: Jenkins healthcheck (instance {{ $labels.instance }})
description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsOutdatedPlugins
expr: 'sum(jenkins_plugins_withUpdate) by (instance) > 3'
for: 1d
labels:
severity: warning
annotations:
summary: Jenkins outdated plugins (instance {{ $labels.instance }})
description: "{{ $value }} plugins need update\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsBuildsHealthScore
expr: 'default_jenkins_builds_health_score < 1'
for: 0m
labels:
severity: critical
annotations:
summary: Jenkins builds health score (instance {{ $labels.instance }})
description: "Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsRunFailureTotal
expr: 'delta(jenkins_runs_failure_total[1h]) > 100'
for: 0m
labels:
severity: warning
annotations:
summary: Jenkins run failure total (instance {{ $labels.instance }})
description: "Job run failures: ({{$value}}) {{$labels.jenkins_job}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsBuildTestsFailing
expr: 'default_jenkins_builds_last_build_tests_failing > 0'
for: 0m
labels:
severity: warning
annotations:
summary: Jenkins build tests failing (instance {{ $labels.instance }})
description: "Last build tests failed: {{$labels.jenkins_job}}. Failed build Tests for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: JenkinsLastBuildFailed
expr: 'default_jenkins_builds_last_build_result_ordinal == 2'
for: 0m
labels:
severity: warning
annotations:
summary: Jenkins last build failed (instance {{ $labels.instance }})
description: "Last build failed: {{$labels.jenkins_job}}. Failed build for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"