chore: add alert consideration times

This commit is contained in:
michaelact 2022-11-29 17:37:17 +07:00
parent 0c7945b736
commit cd0be4e687
No known key found for this signature in database
GPG key ID: 4E42935A4F2BEA14
2 changed files with 6 additions and 6 deletions

View file

@ -313,17 +313,17 @@ groups:
doc_url: https://github.com/prometheus/node_exporter doc_url: https://github.com/prometheus/node_exporter
rules: rules:
- name: Host Memory is under utilized - name: Host Memory is under utilized
description: 'Node memory is not fully used (> 80% free). Consider reducing memory space.' description: 'Node memory is not fully used (> 80% free) for 1 week. Consider reducing memory space.'
query: 'node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 > 80' query: 'node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 > 80'
severity: info severity: info
for: 7d for: 7d
- name: Host Cpu is under utilized - name: Host Cpu is under utilized
description: 'CPU load is < 20%. Consider reducing the number of CPUs.' description: 'CPU load is < 20% for 1 week. Consider reducing the number of CPUs.'
query: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) < 20' query: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) < 20'
severity: info severity: info
for: 7d for: 7d
- name: Host lots of free Disk space - name: Host lots of free Disk space
description: 'Disk space is not fully used (> 80% free). Consider reducing disk space.' description: 'Disk space is not fully used (> 80% free) for 1 week. Consider reducing disk space.'
query: '(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes > 80 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' query: '(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes > 80 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'
severity: info severity: info
for: 7d for: 7d

View file

@ -11,7 +11,7 @@ groups:
severity: info severity: info
annotations: annotations:
summary: Host memory is under utilized (instance {{ $labels.instance }}) summary: Host memory is under utilized (instance {{ $labels.instance }})
description: "Node memory is not fully used (> 80% free). Consider reducing memory space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "Node memory is not fully used (> 80% free) for 1 week. Consider reducing memory space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostUnderUtilizedCpuLoad - alert: HostUnderUtilizedCpuLoad
expr: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) < 20' expr: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) < 20'
@ -20,7 +20,7 @@ groups:
severity: info severity: info
annotations: annotations:
summary: Host CPU load is under utilized (instance {{ $labels.instance }}) summary: Host CPU load is under utilized (instance {{ $labels.instance }})
description: "CPU load is < 20%. Consider reducing the number of CPUs.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "CPU load is < 20% for 1 week. Consider reducing the number of CPUs.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostLotsOfFreeDiskSpace - alert: HostLotsOfFreeDiskSpace
expr: '(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes > 80 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' expr: '(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes > 80 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'
@ -29,4 +29,4 @@ groups:
severity: info severity: info
annotations: annotations:
summary: Host lots of free disk space (instance {{ $labels.instance }}) summary: Host lots of free disk space (instance {{ $labels.instance }})
description: "Disk space is not fully used (> 80% free). Consider reducing disk space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "Disk space is not fully used (> 80% free) for 1 week. Consider reducing disk space.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"