From 18da40f8b4a43b627e5e51a0cbc6a205e08dbbc8 Mon Sep 17 00:00:00 2001 From: alexandrumarian-portal <83924056+alexandrumarian-portal@users.noreply.github.com> Date: Tue, 14 Feb 2023 15:00:43 +0200 Subject: [PATCH] disk io ops alarm (#337) * disk io ops alarm * disk io ops alarm --- dist/rules/host-and-hardware/node-exporter.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dist/rules/host-and-hardware/node-exporter.yml b/dist/rules/host-and-hardware/node-exporter.yml index 8d5cd2f..3c14291 100644 --- a/dist/rules/host-and-hardware/node-exporter.yml +++ b/dist/rules/host-and-hardware/node-exporter.yml @@ -157,6 +157,15 @@ groups: summary: Host CPU high iowait (instance {{ $labels.instance }}) description: "CPU iowait > 5%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: HostUnusualDiskIO + expr: 'rate(node_disk_io_time_seconds_total[1m]) > 0.5' + for: 5m + labels: + severity: warning + annotations: + summary: "Time spent in IO is too high on {{ $labels.instance }}" + description: "100% I/O time for 5minutes. Check storage for issues." + - alert: HostContextSwitching expr: '(rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000' for: 0m