fix: added NodeNetworkUnavailable alerts, rm unused OOD alert (#318)

This commit is contained in:
Valery Voronov 2022-10-31 17:47:27 +03:00 committed by GitHub
parent 670787ae34
commit 1e46eacbe7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 7 deletions

View file

@ -1631,9 +1631,9 @@ groups:
query: 'kube_node_status_condition{condition="DiskPressure",status="true"} == 1'
severity: critical
for: 2m
- name: Kubernetes out of disk
description: "{{ $labels.node }} has OutOfDisk condition"
query: 'kube_node_status_condition{condition="OutOfDisk",status="true"} == 1'
- name: Kubernetes network unavailable
description: "{{ $labels.node }} has NetworkUnavailable condition"
query: 'kube_node_status_condition{condition="NetworkUnavailable",status="true"} == 1'
severity: critical
for: 2m
- name: Kubernetes out of capacity

View file

@ -31,14 +31,14 @@ groups:
summary: Kubernetes disk pressure (instance {{ $labels.instance }})
description: "{{ $labels.node }} has DiskPressure condition\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: KubernetesOutOfDisk
expr: 'kube_node_status_condition{condition="OutOfDisk",status="true"} == 1'
- alert: KubernetesNetworkUnavailable
expr: 'kube_node_status_condition{condition="NetworkUnavailable",status="true"} == 1'
for: 2m
labels:
severity: critical
annotations:
summary: Kubernetes out of disk (instance {{ $labels.instance }})
description: "{{ $labels.node }} has OutOfDisk condition\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
summary: Kubernetes network unavailable (instance {{ $labels.instance }})
description: "{{ $labels.node }} has NetworkUnavailable condition\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: KubernetesOutOfCapacity
expr: 'sum by (node) ((kube_pod_status_phase{phase="Running"} == 1) + on(uid) group_left(node) (0 * kube_pod_info{pod_template_hash=""})) / sum by (node) (kube_node_status_allocatable{resource="pods"}) * 100 > 90'