mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 17:07:24 +08:00
feat(data): adding k8s rule - detect container killed by oomkiller
This commit is contained in:
parent
18672ff0f9
commit
1c44cd7818
1 changed files with 4 additions and 0 deletions
|
|
@ -1501,6 +1501,10 @@ groups:
|
|||
query: 'sum by (node) ((kube_pod_status_phase{phase="Running"} == 1) + on(pod, namespace) group_left(node) (0 * kube_pod_info)) / sum(kube_node_status_allocatable_pods) by (node) * 100 > 90'
|
||||
severity: warning
|
||||
for: 2m
|
||||
- name: Kubernetes container oom killer
|
||||
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes."
|
||||
query: '(kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1'
|
||||
severity: warning
|
||||
- name: Kubernetes Job failed
|
||||
description: "Job {{$labels.namespace}}/{{$labels.exported_job}} failed to complete"
|
||||
query: 'kube_job_status_failed > 0'
|
||||
|
|
|
|||
Loading…
Reference in a new issue