diff --git a/_data/rules.yml b/_data/rules.yml index b3529fc..ba573ba 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -313,17 +313,17 @@ groups: doc_url: https://github.com/prometheus/node_exporter rules: - name: Host Memory is under utilized - description: 'Node memory is not fully used (> 80% free). Consider reducing memory space.' + description: 'Node memory is not fully used (> 80% free) for 1 week. Consider reducing memory space.' query: 'node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 > 80' severity: info for: 7d - name: Host Cpu is under utilized - description: 'CPU load is < 20%. Consider reducing the number of CPUs.' + description: 'CPU load is < 20% for 1 week. Consider reducing the number of CPUs.' query: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) < 20' severity: info for: 7d - name: Host lots of free Disk space - description: 'Disk space is not fully used (> 80% free). Consider reducing disk space.' + description: 'Disk space is not fully used (> 80% free) for 1 week. Consider reducing disk space.' query: '(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes > 80 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' severity: info for: 7d diff --git a/dist/rules/host-and-hardware/node-exporter-under-utilized.yml b/dist/rules/host-and-hardware/node-exporter-under-utilized.yml index e60c12c..1033b49 100644 --- a/dist/rules/host-and-hardware/node-exporter-under-utilized.yml +++ b/dist/rules/host-and-hardware/node-exporter-under-utilized.yml @@ -11,7 +11,7 @@ groups: severity: info annotations: summary: Host memory is under utilized (instance {{ $labels.instance }}) - description: "Node memory is not fully used (> 80% free). Consider reducing memory space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Node memory is not fully used (> 80% free) for 1 week. Consider reducing memory space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: HostUnderUtilizedCpuLoad expr: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) < 20' @@ -20,7 +20,7 @@ groups: severity: info annotations: summary: Host CPU load is under utilized (instance {{ $labels.instance }}) - description: "CPU load is < 20%. Consider reducing the number of CPUs.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "CPU load is < 20% for 1 week. Consider reducing the number of CPUs.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: HostLotsOfFreeDiskSpace expr: '(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes > 80 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' @@ -29,4 +29,4 @@ groups: severity: info annotations: summary: Host lots of free disk space (instance {{ $labels.instance }}) - description: "Disk space is not fully used (> 80% free). Consider reducing disk space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Disk space is not fully used (> 80% free) for 1 week. Consider reducing disk space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"