Update rules.yml

This commit is contained in:
Samuel Berthe 2024-06-14 20:15:44 +02:00 committed by GitHub
parent 1ee046b739
commit ca4fb01c6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -240,12 +240,15 @@ groups:
query: '(rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
severity: warning
for: 5m
- name: Host context switching
description: Context switching is growing on the node (> 10000 / CPU / s)
query: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
- name: Host context switching high
description: Context switching is growing on the node (twice the daily average during the last 15m)
query: |
(rate(node_context_switches_total[15m])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"}))
/
(rate(node_context_switches_total[1d])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) > 2
severity: warning
comments: |
10000 context switches is an arbitrary number.
x2 context switches is an arbitrary number.
The alert threshold depends on the nature of the application.
Please read: https://github.com/samber/awesome-prometheus-alerts/issues/58
- name: Host swap is filling up