mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 00:47:18 +08:00
Merge branch 'master' of github.com:samber/awesome-prometheus-alerts
This commit is contained in:
commit
a381fb5e22
2 changed files with 4 additions and 3 deletions
|
|
@ -1692,8 +1692,9 @@ groups:
|
|||
for: 2m
|
||||
- name: Kubernetes Pod not healthy
|
||||
description: Pod has been in a non-ready state for longer than 15 minutes.
|
||||
query: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[15m:1m]) > 0'
|
||||
query: 'sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"}) > 0'
|
||||
severity: critical
|
||||
for: 15m
|
||||
- name: Kubernetes pod crash looping
|
||||
description: Pod {{ $labels.pod }} is crash looping
|
||||
query: 'increase(kube_pod_container_status_restarts_total[1m]) > 3'
|
||||
|
|
|
|||
4
dist/rules/kubernetes/kubestate-exporter.yml
vendored
4
dist/rules/kubernetes/kubestate-exporter.yml
vendored
|
|
@ -149,8 +149,8 @@ groups:
|
|||
description: "The maximum number of desired Pods has been hit\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: KubernetesPodNotHealthy
|
||||
expr: 'min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[15m:1m]) > 0'
|
||||
for: 0m
|
||||
expr: 'sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"}) > 0'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
|
|
|
|||
Loading…
Reference in a new issue