From ca4fb01c6dda3514048fb28166d3bd9f40b06ef7 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Fri, 14 Jun 2024 20:15:44 +0200 Subject: [PATCH] Update rules.yml --- _data/rules.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 9a08f71..8994d44 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -240,12 +240,15 @@ groups: query: '(rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}' severity: warning for: 5m - - name: Host context switching - description: Context switching is growing on the node (> 10000 / CPU / s) - query: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}' + - name: Host context switching high + description: Context switching is growing on the node (twice the daily average during the last 15m) + query: | + (rate(node_context_switches_total[15m])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) + / + (rate(node_context_switches_total[1d])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) > 2 severity: warning comments: | - 10000 context switches is an arbitrary number. + x2 context switches is an arbitrary number. The alert threshold depends on the nature of the application. Please read: https://github.com/samber/awesome-prometheus-alerts/issues/58 - name: Host swap is filling up