From 7e8bc1a215f0c8dbd17d232499b77afc025a7378 Mon Sep 17 00:00:00 2001 From: michaelact <86778470+michaelact@users.noreply.github.com> Date: Mon, 22 May 2023 03:58:04 +0700 Subject: [PATCH] Add under-utilized container alerts (#322) * chore: add container under-utilized allerts * chore: resolve duplicated query and description --- _data/rules.yml | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 8211f69..f4230fd 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -347,12 +347,12 @@ groups: for: 5m comments: | This rule can be very noisy in dynamic infra with legitimate container start/stop/deployment. - - name: Container CPU usage - description: Container CPU usage is above 80% + - name: Container High CPU utilization + description: Container CPU utilization is above 80% query: '(sum(rate(container_cpu_usage_seconds_total{name!=""}[3m])) BY (instance, name) * 100) > 80' severity: warning for: 2m - - name: Container Memory usage + - name: Container High Memory usage description: Container Memory usage is above 80% query: '(sum(container_memory_working_set_bytes{name!=""}) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) > 80' severity: warning @@ -368,6 +368,17 @@ groups: query: 'rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1' severity: warning for: 2m + - name: Container Low CPU utilization + description: Container CPU utilization is under 20% for 1 week. Consider reducing the allocated CPU. + query: '(sum(rate(container_cpu_usage_seconds_total{name!=""}[3m])) BY (instance, name) * 100) < 20' + severity: info + for: 7d + - name: Container Low Memory usage + description: Container Memory usage is under 20% for 1 week. Consider reducing the allocated memory. + query: '(sum(container_memory_working_set_bytes{name!=""}) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) < 20' + severity: info + for: 7d + - name: Blackbox exporters: