diff --git a/_data/rules.yml b/_data/rules.yml index 3f6f178..71d2741 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -2044,6 +2044,16 @@ groups: query: 'kube_node_status_condition{condition="Ready",status="true"} == 0' severity: critical for: 10m + # Kubernetes Node with disabled schedules are fine. + # This alarm can be useful to get warned if there are nodes which are longer unscheduled + - alert: KubernetesNodeSchedulingDisabled + expr: kube_node_spec_taint{key="node.kubernetes.io/unschedulable"} == 1 + for: 30m + labels: + severity: warning + annotations: + summary: "Kubernetes Node Scheduling Disabled (node: {{ $labels.node }})" + description: "Node {{ $labels.node }} has been marked as unschedulable for more than 30 minutes." - name: Kubernetes Node memory pressure summary: Kubernetes memory pressure (node {{ $labels.node }}) description: "Node {{ $labels.node }} has MemoryPressure condition" diff --git a/dist/rules/kubernetes/kubestate-exporter.yml b/dist/rules/kubernetes/kubestate-exporter.yml index 744fe7a..7e32694 100644 --- a/dist/rules/kubernetes/kubestate-exporter.yml +++ b/dist/rules/kubernetes/kubestate-exporter.yml @@ -22,17 +22,6 @@ groups: summary: Kubernetes memory pressure (node {{ $labels.node }}) description: "Node {{ $labels.node }} has MemoryPressure condition\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - # Kubernetes Node with disabled schedules are fine. - # This alarm can be useful to get warned if there are nodes which are longer unscheduled - - alert: KubernetesNodeSchedulingDisabled - expr: kube_node_spec_taint{key="node.kubernetes.io/unschedulable"} == 1 - for: 30m - labels: - severity: warning - annotations: - summary: "Kubernetes Node Scheduling Disabled (node: {{ $labels.node }})" - description: "Node {{ $labels.node }} has been marked as unschedulable for more than 30 minutes." - - alert: KubernetesNodeDiskPressure expr: 'kube_node_status_condition{condition="DiskPressure",status="true"} == 1' for: 2m