Update rules.yml

2026-06-23 18:06:58 +08:00 · 2023-06-22 18:40:33 +02:00 · 2023-06-22 18:40:33 +02:00 · 47b7748618
commit 47b7748618
parent a4dbefd853
1 changed files with 5 additions and 5 deletions
--- a/_data/rules.yml
+++ b/_data/rules.yml
@ -237,12 +237,12 @@ groups:
                severity: warning
                for: 5m
              - name: Host context switching
-                description: Context switching is growing on node (> 1000 / s)
+                description: Context switching is growing on the node (> 10000 / s)
-                query: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+                query: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
                severity: warning
                comments: |
-                  1000 context switches is an arbitrary number.
+                  10000 context switches is an arbitrary number.
-                  Alert threshold depends on nature of application.
+                  The alert threshold depends on the nature of the application.
                  Please read: https://github.com/samber/awesome-prometheus-alerts/issues/58
              - name: Host swap is filling up
                description: Swap is filling up (>80%)
@ -263,7 +263,7 @@ groups:
                query: '(node_hwmon_temp_crit_alarm_celsius == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
                severity: critical
              - name: Host RAID array got inactive
-                description: 'RAID array {{ $labels.device }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.'
+                description: 'RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically.'
                query: '(node_md_state{state="inactive"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
                severity: critical
              - name: Host RAID disk failure