From 1f7712b3322a2c6e0ba241d0ad7c8127c61b859f Mon Sep 17 00:00:00 2001 From: Anton Markelov Date: Mon, 1 Feb 2021 10:48:43 +0200 Subject: [PATCH 1/2] add alerts for dabealu/zookeeper-exporter --- _data/rules.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/_data/rules.yml b/_data/rules.yml index c9d1b9e..7ceeb9d 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1090,6 +1090,29 @@ groups: - name: cloudflare/kafka_zookeeper_exporter doc_url: https://github.com/cloudflare/kafka_zookeeper_exporter rules: + - name: dabealu/zookeeper-exporter + doc_url: https://github.com/dabealu/zookeeper-exporter + rules: + - name: Zookeeper Down + description: "Zookeeper down on instance {{ $labels.instance }}" + query: 'zk_up == 0' + severity: critical + for: 3m + - name: Zookeeper missing leader + description: "Zookeeper cluster has no node marked as leader" + query: 'sum(zk_server_leader) == 0' + severity: critical + for: 3m + - name: Zookeeper Too Many Leaders + description: "Zookeeper cluster has too many nodes marked as leader" + query: 'sum(zk_server_leader) > 1' + severity: critical + for: 3m + - name: Zookeeper Not Ok + description: "Zookeeper instance is not ok" + query: 'zk_ruok == 0' + severity: high + for: 3m - name: Kafka exporters: From 040cbe1acef7a51ee9a561d503eced11b7704d0e Mon Sep 17 00:00:00 2001 From: Anton Markelov Date: Tue, 2 Feb 2021 14:19:32 +0200 Subject: [PATCH 2/2] add suggested changes --- _data/rules.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 7ceeb9d..df2eb78 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1097,21 +1097,18 @@ groups: description: "Zookeeper down on instance {{ $labels.instance }}" query: 'zk_up == 0' severity: critical - for: 3m - name: Zookeeper missing leader description: "Zookeeper cluster has no node marked as leader" query: 'sum(zk_server_leader) == 0' severity: critical - for: 3m - name: Zookeeper Too Many Leaders description: "Zookeeper cluster has too many nodes marked as leader" query: 'sum(zk_server_leader) > 1' severity: critical - for: 3m - name: Zookeeper Not Ok description: "Zookeeper instance is not ok" query: 'zk_ruok == 0' - severity: high + severity: warning for: 3m - name: Kafka