From 54e2b09b3dff108f2a3741b520a650ce5d0452ce Mon Sep 17 00:00:00 2001 From: Evi Vanoost Date: Tue, 2 Jul 2024 13:49:12 -0400 Subject: [PATCH] Query fails if instance names are not unique across jobs. This fixes it. --- _data/rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index b7fd69f..0121157 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -27,7 +27,7 @@ groups: severity: critical - name: Prometheus target missing with warmup time description: Allow a job time to start up (10 minutes) before alerting that it's down. - query: "sum by (instance, job) ((up == 0) * on (instance) group_right(job) (node_time_seconds - node_boot_time_seconds > 600))" + query: "sum by (instance, job) ((up == 0) * on (instance) group_left (__name__) (node_time_seconds - node_boot_time_seconds > 600))" severity: critical - name: Prometheus configuration reload failure description: Prometheus configuration reload error