From 8460f9008e1eb191bb62d445ea17709698ce63db Mon Sep 17 00:00:00 2001 From: "R.Sicart" Date: Tue, 14 May 2024 20:34:43 +0200 Subject: [PATCH] fix: some kube api alert lint (#416) * fix: apiserver regexp matchers are automatically fully anchored Signed-off-by: R.Sicart * fix: apiserver errors alert is using label but the query removes it Signed-off-by: R.Sicart * fix: apiserver latency alert is using label but the query removes it Signed-off-by: R.Sicart --------- Signed-off-by: R.Sicart --- _data/rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 61425a5..eb87723 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -2071,7 +2071,7 @@ groups: for: 12h - name: Kubernetes API server errors description: Kubernetes API server is experiencing high error rate - query: 'sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[1m])) / sum(rate(apiserver_request_total{job="apiserver"}[1m])) * 100 > 3' + query: 'sum(rate(apiserver_request_total{job="apiserver",code=~"(?:5..)"}[1m])) by (instance, job) / sum(rate(apiserver_request_total{job="apiserver"}[1m])) by (instance, job) * 100 > 3' severity: critical for: 2m - name: Kubernetes API client errors @@ -2089,7 +2089,7 @@ groups: severity: critical - name: Kubernetes API server latency description: "Kubernetes API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}." - query: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{subresource!="log",verb!~"^(?:CONNECT|WATCHLIST|WATCH|PROXY)$"} [10m])) WITHOUT (instance, resource)) > 1' + query: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~"(?:CONNECT|WATCHLIST|WATCH|PROXY)"} [10m])) WITHOUT (subresource)) > 1' severity: warning for: 2m