added jenkins alert rules for jenkins metrics plugin (#268)

* added jenkins alert rules

* Update rules.yml

Co-authored-by: Samuel Berthe <dev@samuel-berthe.fr>
This commit is contained in:
MikeN. Paxos 2021-12-27 06:48:07 -05:00 committed by GitHub
parent fd0f2805c0
commit 78a7e61050
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2126,3 +2126,41 @@ groups:
query: sum by (job) (cortex_query_frontend_queue_length) > 0
severity: critical
for: 5m
- name: Jenkins
exporters:
- name: Metric plugin
doc_url: https://plugins.jenkins.io/prometheus/
rules:
- name: Jenkins offline
description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
query: 'jenkins_node_offline_value > 1'
severity: critical
- name: Jenkins healthcheck
description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
query: 'jenkins_health_check_score < 1'
severity: critical
- name: Jenkins builds health score
description: "Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
query: 'default_jenkins_builds_health_score < 1'
severity: critical
- name: Jenkins run failure total
description: "Job run failures: ({{$value}}) {{$labels.jenkins_job}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
query: 'delta(jenkins_runs_failure_total[1h]) > 100'
severity: warning
- name: JenkinsBuildTestsFailing
description: "Last build tests failed: {{$labels.jenkins_job}}. Failed build Tests for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})"
query: 'default_jenkins_builds_last_build_tests_failing > 0'
severity: warning
- name: Jenkins last build failed
description: "Last build failed: {{$labels.jenkins_job}}. Failed build for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})"
query: 'default_jenkins_builds_last_build_result_ordinal == 2'
severity: warning
comments: |
* RUNNING -1 true - The build had no errors.
* SUCCESS 0 true - The build had no errors.
* UNSTABLE 1 true - The build had some errors but they were not fatal. For example, some tests failed.
* FAILURE 2 false - The build had a fatal error.
* NOT_BUILT 3 false - The module was not built.
* ABORTED 4 false - The build was manually aborted.