disk io ops alarm

This commit is contained in:
marian.ciotlos@yahoo.com 2023-02-06 12:24:56 +02:00
parent 7e53aa2edd
commit 8559511a47

View file

@ -157,6 +157,15 @@ groups:
summary: Host CPU high iowait (instance {{ $labels.instance }}) summary: Host CPU high iowait (instance {{ $labels.instance }})
description: "CPU iowait > 5%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "CPU iowait > 5%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostUnusualDiskIO
expr: 'node_disk_io_time_seconds_total == 1'
for: 5m
labels:
severity: warning
annotations:
summary: "Time spent in IO is too high on {{ $labels.instance }}"
description: "100% I/O time for 5minutes. Check storage for issues."
- alert: HostContextSwitching - alert: HostContextSwitching
expr: '(rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000' expr: '(rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000'
for: 0m for: 0m