From 78a7e61050d5a7a07f24a89270b2bba8512db9a7 Mon Sep 17 00:00:00 2001 From: "MikeN. Paxos" <84518874+mike2194@users.noreply.github.com> Date: Mon, 27 Dec 2021 06:48:07 -0500 Subject: [PATCH] added jenkins alert rules for jenkins metrics plugin (#268) * added jenkins alert rules * Update rules.yml Co-authored-by: Samuel Berthe --- _data/rules.yml | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/_data/rules.yml b/_data/rules.yml index acf067b..1f98cb6 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -2126,3 +2126,41 @@ groups: query: sum by (job) (cortex_query_frontend_queue_length) > 0 severity: critical for: 5m + + - name: Jenkins + exporters: + - name: Metric plugin + doc_url: https://plugins.jenkins.io/prometheus/ + rules: + - name: Jenkins offline + description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" + query: 'jenkins_node_offline_value > 1' + severity: critical + - name: Jenkins healthcheck + description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" + query: 'jenkins_health_check_score < 1' + severity: critical + - name: Jenkins builds health score + description: "Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" + query: 'default_jenkins_builds_health_score < 1' + severity: critical + - name: Jenkins run failure total + description: "Job run failures: ({{$value}}) {{$labels.jenkins_job}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" + query: 'delta(jenkins_runs_failure_total[1h]) > 100' + severity: warning + - name: JenkinsBuildTestsFailing + description: "Last build tests failed: {{$labels.jenkins_job}}. Failed build Tests for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})" + query: 'default_jenkins_builds_last_build_tests_failing > 0' + severity: warning + - name: Jenkins last build failed + description: "Last build failed: {{$labels.jenkins_job}}. Failed build for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})" + query: 'default_jenkins_builds_last_build_result_ordinal == 2' + severity: warning + comments: | + * RUNNING -1 true - The build had no errors. + * SUCCESS 0 true - The build had no errors. + * UNSTABLE 1 true - The build had some errors but they were not fatal. For example, some tests failed. + * FAILURE 2 false - The build had a fatal error. + * NOT_BUILT 3 false - The module was not built. + * ABORTED 4 false - The build was manually aborted. +