Update rule for host memory underutilization to use avg_over_time instead of rate, since node_memory_MemAvailable_bytes is a gauge (#400)

This commit is contained in:
Brett Beutell 2024-01-26 04:09:35 +01:00 committed by GitHub
parent df4016bf6a
commit 56a7e0d03a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -23,7 +23,7 @@ groups:
description: "The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostMemoryIsUnderutilized
expr: '(100 - (rate(node_memory_MemAvailable_bytes[30m]) / node_memory_MemTotal_bytes * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
expr: '(100 - (avg_over_time(node_memory_MemAvailable_bytes[30m]) / node_memory_MemTotal_bytes * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
for: 1w
labels:
severity: info