From 9f5c641bddde827d25c0284134c16a5ac3b94503 Mon Sep 17 00:00:00 2001 From: samber Date: Wed, 23 Apr 2025 08:31:10 +0000 Subject: [PATCH] Publish --- _data/.#rules.yml | 1 - _data/rules.yml | 9 +++++++++ dist/rules/kubernetes/kubestate-exporter.yml | 9 +++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) delete mode 120000 _data/.#rules.yml diff --git a/_data/.#rules.yml b/_data/.#rules.yml deleted file mode 120000 index 812d0ec..0000000 --- a/_data/.#rules.yml +++ /dev/null @@ -1 +0,0 @@ -samber@Sambers-MacBook.local.46405 \ No newline at end of file diff --git a/_data/rules.yml b/_data/rules.yml index f647ceb..1e41c92 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -2044,6 +2044,15 @@ groups: query: 'kube_node_status_condition{condition="Ready",status="true"} == 0' severity: critical for: 10m + - name: Kubernetes Node scheduling disabled + summary: Kubernetes node scheduling disabled (node {{ $labels.node }}) + description: Node {{ $labels.node }} has been marked as unschedulable for more than 30 minutes. + query: 'kube_node_spec_taint{key="node.kubernetes.io/unschedulable"} == 1' + severity: warning + for: 30m + comments: | + Kubernetes Node with disabled schedules are fine. + This alarm can be useful to get warned if there are nodes which are longer unscheduled. - name: Kubernetes Node memory pressure summary: Kubernetes memory pressure (node {{ $labels.node }}) description: "Node {{ $labels.node }} has MemoryPressure condition" diff --git a/dist/rules/kubernetes/kubestate-exporter.yml b/dist/rules/kubernetes/kubestate-exporter.yml index 7e32694..b13fbb5 100644 --- a/dist/rules/kubernetes/kubestate-exporter.yml +++ b/dist/rules/kubernetes/kubestate-exporter.yml @@ -13,6 +13,15 @@ groups: summary: Kubernetes Node ready (node {{ $labels.node }}) description: "Node {{ $labels.node }} has been unready for a long time\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: KubernetesNodeSchedulingDisabled + expr: 'kube_node_spec_taint{key="node.kubernetes.io/unschedulable"} == 1' + for: 30m + labels: + severity: warning + annotations: + summary: Kubernetes node scheduling disabled (node {{ $labels.node }}) + description: "Node {{ $labels.node }} has been marked as unschedulable for more than 30 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: KubernetesNodeMemoryPressure expr: 'kube_node_status_condition{condition="MemoryPressure",status="true"} == 1' for: 2m