diff --git a/_data/rules.yml b/_data/rules.yml index bf2ee22..f4c66c1 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -3259,9 +3259,14 @@ groups: slug: metric-plugin doc_url: https://plugins.jenkins.io/prometheus/ rules: - - name: Jenkins offline - description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" - query: "jenkins_node_offline_value > 1" + - name: Jenkins node offline + description: "At least one Jenkins node offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" + query: "jenkins_node_offline_value > 0" + severity: critical + for: 5m + - name: Jenkins no node online + description: "No Jenkins nodes are online: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" + query: "jenkins_node_online_value == 0" severity: critical - name: Jenkins healthcheck description: "Jenkins healthcheck score: {{$value}}. Healthcheck failure for `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})" diff --git a/dist/rules/jenkins/metric-plugin.yml b/dist/rules/jenkins/metric-plugin.yml index 57c9cf6..5271e1e 100644 --- a/dist/rules/jenkins/metric-plugin.yml +++ b/dist/rules/jenkins/metric-plugin.yml @@ -5,14 +5,23 @@ groups: rules: - - alert: JenkinsOffline - expr: 'jenkins_node_offline_value > 1' + - alert: JenkinsNodeOffline + expr: 'jenkins_node_offline_value > 0' + for: 5m + labels: + severity: critical + annotations: + summary: Jenkins node offline (instance {{ $labels.instance }}) + description: "At least one Jenkins node offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: JenkinsNoNodeOnline + expr: 'jenkins_node_online_value == 0' for: 0m labels: severity: critical annotations: - summary: Jenkins offline (instance {{ $labels.instance }}) - description: "Jenkins offline: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + summary: Jenkins no node online (instance {{ $labels.instance }}) + description: "No Jenkins nodes are online: `{{$labels.instance}}` in realm {{$labels.realm}}/{{$labels.env}} ({{$labels.region}})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: JenkinsHealthcheck expr: 'jenkins_health_check_score < 1'