From a7c5155002a887dfe4a288738ca5c8b3e483012a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20D=C3=BCvel?= Date: Mon, 21 Dec 2020 19:06:45 +0100 Subject: [PATCH] Add cpu steal alert --- _data/rules.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/_data/rules.yml b/_data/rules.yml index 22f22c3..082b690 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -170,6 +170,10 @@ groups: description: CPU load is > 80% query: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80' severity: warning + - name: CPU steal + description: CPU steal is > 10% + query: 'avg by(instance) (rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100 > 10' + severity: warning - name: Host context switching description: Context switching is growing on node (> 1000 / s) query: '(rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000'