added jenkins alert rules

This commit is contained in:
MikeN. Paxos 2021-12-20 14:08:38 -05:00
parent fd0f2805c0
commit 8d12a63ea1
No known key found for this signature in database
GPG key ID: 637249C418634DA7

View file

@ -2126,3 +2126,51 @@ groups:
query: sum by (job) (cortex_query_frontend_queue_length) > 0
severity: critical
for: 5m
- name: Jenkins Metrics (Plugin)
exporters:
- rules:
- name: JenkinsOffline
query: 'jenkins_node_offline_value > 1'
severity: critical
summary: "Jenkins OFFLINE"
description: "Jenkins Offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
jenkins_url: "https://{{$labels.instance}}/"
- name: JenkinsHealthCheck
query: 'jenkins_health_check_score < 1'
severity: critical
summary: "Jenkins Health Check Score: {{$value}}"
description: "Health Check Failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
jenkins_url: "https://{{$labels.instance}}/"
- name: JenkinsBuildsHealthScore
query: 'default_jenkins_builds_health_score{} < 1'
severity: critical
summary: "Jenkins Health Check Score: {{$value}}"
description: "Health Check Failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
jenkins_url: "https://{{$labels.instance}}/search/?q={{$labels.jenkins_job}}"
- name: JenkinsRunFailureTotal
query: 'delta(jenkins_runs_failure_total{}[1h]) > 100'
severity: warning
summary: "Job Run Failures: ({{$value}}) {{$labels.jenkins_job}}"
description: "Health Check Failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
jenkins_url: "https://{{$labels.instance}}/search/?q={{$labels.jenkins_job}}"
- name: JenkinsBuildTestsFailing
query: 'default_jenkins_builds_last_build_tests_failing > 0'
severity: warning
summary: "Last Build Tests Failed: {{$labels.jenkins_job}}"
description: "FAILED Build Tests for Job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})"
jenkins_url: "https://{{$labels.instance}}/search/?q={{$labels.jenkins_job}}"
- name: JenkinsLastBuildFailed
query: 'default_jenkins_builds_last_build_result_ordinal{} == 2'
severity: warning
summary: "Last Build Failed: {{$labels.jenkins_job}}"
description: "FAILED Build for Job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})"
jenkins_url: "https://{{$labels.instance}}/search/?q={{$labels.jenkins_job}}"
comments: |
* RUNNING -1 true - The build had no errors.
* SUCCESS 0 true - The build had no errors.
* UNSTABLE 1 true - The build had some errors but they were not fatal. For example, some tests failed.
* FAILURE 2 false - The build had a fatal error.
* NOT_BUILT 3 false - The module was not built.
* ABORTED 4 false - The build was manually aborted.