diff --git a/README.md b/README.md
index 37cea8b..821c3d2 100644
--- a/README.md
+++ b/README.md
@@ -77,6 +77,7 @@ Collection available here: **[https://awesome-prometheus-alerts.grep.to](https:/
- [Loki](https://awesome-prometheus-alerts.grep.to/rules#loki)
- [Promtail](https://awesome-prometheus-alerts.grep.to/rules#promtail)
- [Cortex](https://awesome-prometheus-alerts.grep.to/rules#cortex)
+- [Jenkins](https://awesome-prometheus-alerts.grep.to/rules#jenkins)
## π€ Contributing
diff --git a/_data/rules.yml b/_data/rules.yml
index acf067b..a89bbe4 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -33,6 +33,10 @@ groups:
description: Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.
query: 'changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 2'
severity: warning
+ - name: Prometheus AlertManager job missing
+ description: A Prometheus AlertManager job has disappeared
+ query: 'absent(up{job="alertmanager"})'
+ severity: warning
- name: Prometheus AlertManager configuration reload failure
description: AlertManager configuration reload error
query: 'alertmanager_config_last_reload_successful != 1'
@@ -494,12 +498,12 @@ groups:
severity: critical
- name: MySQL too many connections (> 80%)
description: 'More than 80% of MySQL connections are in use on {{ $labels.instance }}'
- query: 'avg by (instance) (rate(mysql_global_status_threads_connected[1m])) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80'
+ query: 'max_over_time(mysql_global_status_threads_connected[1m]) / mysql_global_variables_max_connections * 100 > 80'
severity: warning
for: 2m
- name: MySQL high threads running
description: 'More than 60% of MySQL connections are in running state on {{ $labels.instance }}'
- query: 'avg by (instance) (rate(mysql_global_status_threads_running[1m])) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 60'
+ query: 'max_over_time(mysql_global_status_threads_running[1m]) / mysql_global_variables_max_connections * 100 > 60'
severity: warning
for: 2m
- name: MySQL Slave IO thread not running
@@ -2126,3 +2130,41 @@ groups:
query: sum by (job) (cortex_query_frontend_queue_length) > 0
severity: critical
for: 5m
+
+ - name: Jenkins
+ exporters:
+ - name: Metric plugin
+ doc_url: https://plugins.jenkins.io/prometheus/
+ rules:
+ - name: Jenkins offline
+ description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
+ query: 'jenkins_node_offline_value > 1'
+ severity: critical
+ - name: Jenkins healthcheck
+ description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
+ query: 'jenkins_health_check_score < 1'
+ severity: critical
+ - name: Jenkins builds health score
+ description: "Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
+ query: 'default_jenkins_builds_health_score < 1'
+ severity: critical
+ - name: Jenkins run failure total
+ description: "Job run failures: ({{$value}}) {{$labels.jenkins_job}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})"
+ query: 'delta(jenkins_runs_failure_total[1h]) > 100'
+ severity: warning
+ - name: JenkinsBuildTestsFailing
+ description: "Last build tests failed: {{$labels.jenkins_job}}. Failed build Tests for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})"
+ query: 'default_jenkins_builds_last_build_tests_failing > 0'
+ severity: warning
+ - name: Jenkins last build failed
+ description: "Last build failed: {{$labels.jenkins_job}}. Failed build for job `{{$labels.jenkins_job}}` on {{$labels.instance}}/{{$labels.env}} ({{$labels.region}})"
+ query: 'default_jenkins_builds_last_build_result_ordinal == 2'
+ severity: warning
+ comments: |
+ * RUNNING -1 true - The build had no errors.
+ * SUCCESS 0 true - The build had no errors.
+ * UNSTABLE 1 true - The build had some errors but they were not fatal. For example, some tests failed.
+ * FAILURE 2 false - The build had a fatal error.
+ * NOT_BUILT 3 false - The module was not built.
+ * ABORTED 4 false - The build was manually aborted.
+
diff --git a/π.md b/π.md
index cf831cd..dabd23c 100644
--- a/π.md
+++ b/π.md
@@ -3,5 +3,5 @@
πΊπ¦ Forbidden to Russian people.
- Please come back as soon as pease returns to Eastern Europe. πΊπ¦ π€ π·πΊ
+ Please come back as soon as peace returns to Eastern Europe. πΊπ¦ π€ π·πΊ