From 90706282ad429e22996edcd037048b0c385593c2 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Mon, 11 Mar 2024 22:55:05 +0100 Subject: [PATCH 1/4] Update rules.yml --- _data/rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 42ff8f0..4a879d5 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -401,9 +401,9 @@ groups: for: 2m - name: Container high throttle rate description: Container is being throttled - query: "rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1" + query: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 75 / 100 )' severity: warning - for: 2m + for: 5m - name: Container Low CPU utilization description: Container CPU utilization is under 20% for 1 week. Consider reducing the allocated CPU. query: '(sum(rate(container_cpu_usage_seconds_total{container!=""}[5m])) by (pod, container) / sum(container_spec_cpu_quota{container!=""}/container_spec_cpu_period{container!=""}) by (pod, container) * 100) < 20' From 7b3cef8bf9c4f5902d02e29ba226d3b6436d1dcb Mon Sep 17 00:00:00 2001 From: samber Date: Mon, 11 Mar 2024 21:56:16 +0000 Subject: [PATCH 2/4] Publish --- dist/rules/docker-containers/google-cadvisor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/rules/docker-containers/google-cadvisor.yml b/dist/rules/docker-containers/google-cadvisor.yml index 7c7bcb7..b495569 100644 --- a/dist/rules/docker-containers/google-cadvisor.yml +++ b/dist/rules/docker-containers/google-cadvisor.yml @@ -50,8 +50,8 @@ groups: description: "Container Volume usage is above 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ContainerHighThrottleRate - expr: 'rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1' - for: 2m + expr: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 75 / 100 )' + for: 5m labels: severity: warning annotations: From 1eb5c5834fcb92116da32b460a1a7bbf15f4b77e Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Mon, 11 Mar 2024 23:28:06 +0100 Subject: [PATCH 3/4] Update rules.yml --- _data/rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/rules.yml b/_data/rules.yml index 4a879d5..9f430eb 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -401,7 +401,7 @@ groups: for: 2m - name: Container high throttle rate description: Container is being throttled - query: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 75 / 100 )' + query: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 25 / 100 )' severity: warning for: 5m - name: Container Low CPU utilization From 693c9e51b2260fd1f11622430957b4da285fa0ae Mon Sep 17 00:00:00 2001 From: samber Date: Mon, 11 Mar 2024 22:29:17 +0000 Subject: [PATCH 4/4] Publish --- dist/rules/docker-containers/google-cadvisor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/rules/docker-containers/google-cadvisor.yml b/dist/rules/docker-containers/google-cadvisor.yml index b495569..cfbc333 100644 --- a/dist/rules/docker-containers/google-cadvisor.yml +++ b/dist/rules/docker-containers/google-cadvisor.yml @@ -50,7 +50,7 @@ groups: description: "Container Volume usage is above 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ContainerHighThrottleRate - expr: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 75 / 100 )' + expr: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 25 / 100 )' for: 5m labels: severity: warning