From 805f0d369502d18b5a9cac6af12ba371fd06726c Mon Sep 17 00:00:00 2001 From: michaelact <86778470+michaelact@users.noreply.github.com> Date: Tue, 29 Nov 2022 17:34:20 +0700 Subject: [PATCH] chore: add under-utilized alerts --- .../node-exporter-under-utilized.yml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 dist/rules/host-and-hardware/node-exporter-under-utilized.yml diff --git a/dist/rules/host-and-hardware/node-exporter-under-utilized.yml b/dist/rules/host-and-hardware/node-exporter-under-utilized.yml new file mode 100644 index 0000000..e60c12c --- /dev/null +++ b/dist/rules/host-and-hardware/node-exporter-under-utilized.yml @@ -0,0 +1,32 @@ +groups: + +- name: NodeExporter + + rules: + + - alert: HostUnderUtilizedOfMemory + expr: 'node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 > 80' + for: 7d + labels: + severity: info + annotations: + summary: Host memory is under utilized (instance {{ $labels.instance }}) + description: "Node memory is not fully used (> 80% free). Consider reducing memory space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostUnderUtilizedCpuLoad + expr: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) < 20' + for: 7d + labels: + severity: info + annotations: + summary: Host CPU load is under utilized (instance {{ $labels.instance }}) + description: "CPU load is < 20%. Consider reducing the number of CPUs.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostLotsOfFreeDiskSpace + expr: '(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes > 80 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' + for: 7d + labels: + severity: info + annotations: + summary: Host lots of free disk space (instance {{ $labels.instance }}) + description: "Disk space is not fully used (> 80% free). Consider reducing disk space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"