diff --git a/_data/rules.yml b/_data/rules.yml index 8192b87..4b86162 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -408,9 +408,9 @@ groups: for: 2m - name: Container high throttle rate description: Container is being throttled - query: "rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1" + query: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 25 / 100 )' severity: warning - for: 2m + for: 5m - name: Container Low CPU utilization description: Container CPU utilization is under 20% for 1 week. Consider reducing the allocated CPU. query: '(sum(rate(container_cpu_usage_seconds_total{container!=""}[5m])) by (pod, container) / sum(container_spec_cpu_quota{container!=""}/container_spec_cpu_period{container!=""}) by (pod, container) * 100) < 20' diff --git a/dist/rules/docker-containers/google-cadvisor.yml b/dist/rules/docker-containers/google-cadvisor.yml index 7c7bcb7..cfbc333 100644 --- a/dist/rules/docker-containers/google-cadvisor.yml +++ b/dist/rules/docker-containers/google-cadvisor.yml @@ -50,8 +50,8 @@ groups: description: "Container Volume usage is above 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: ContainerHighThrottleRate - expr: 'rate(container_cpu_cfs_throttled_seconds_total[3m]) > 1' - for: 2m + expr: 'sum(increase(container_cpu_cfs_throttled_periods_total{container!=""}[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace) > ( 25 / 100 )' + for: 5m labels: severity: warning annotations: