From 6c5f708179bdfee374e0efcf1b27e521e204876f Mon Sep 17 00:00:00 2001 From: Daniel Andrzejewski Date: Thu, 17 Sep 2020 15:13:42 +0200 Subject: [PATCH 1/2] node_disk_write_time_seconds_total is in seconds, not in milliseconds. node_disk_write_time_seconds_total should be grater than 0, otherwise you get +Inf result. --- _data/rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index c60ab90..2983754 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -157,11 +157,11 @@ groups: severity: warning - name: Host unusual disk read latency description: Disk latency is growing (read operations > 100ms) - query: "rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 100" + query: "rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m])" severity: warning - name: Host unusual disk write latency description: Disk latency is growing (write operations > 100ms) - query: "rate(node_disk_write_time_seconds_total[1m]) / rate(node_disk_writes_completed_total[1m]) > 100" + query: "rate(node_disk_write_time_seconds_total[1m]) / rate(node_disk_writes_completed_total[1m]) > 0.1 and rate(node_disk_writes_completed_total[1m]) > 0" severity: warning - name: Host high CPU load description: CPU load is > 80% From fc4797db9e523e0f793f0486119e94c3cd7b06e7 Mon Sep 17 00:00:00 2001 From: Daniel Andrzejewski Date: Thu, 17 Sep 2020 15:19:14 +0200 Subject: [PATCH 2/2] small fix --- _data/rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index 2983754..86edb23 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -157,7 +157,7 @@ groups: severity: warning - name: Host unusual disk read latency description: Disk latency is growing (read operations > 100ms) - query: "rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m])" + query: "rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m]) > 0" severity: warning - name: Host unusual disk write latency description: Disk latency is growing (write operations > 100ms)