From 28b1cc7dd7568e89df38ea040769b8eca0897a9a Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Mon, 16 Mar 2026 14:19:28 +0100 Subject: [PATCH] fix: address PR review comments on Tempo/Mimir rules - Fix Tempo no tenant index builders: add on() for cross-label-set and - Fix Tempo block list rising: output percentage instead of ratio - Fix Mimir memory map areas: multiply by 100 to match % description - Fix all instance limit rules: multiply by 100 to match % descriptions - Fix distributor inflight requests: add % to description --- _data/rules.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index fa08061..c4cc1b7 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -4187,7 +4187,7 @@ groups: severity: critical - name: Tempo no tenant index builders description: No tenant index builders for tenant {{ $labels.tenant }}. Tenant index will quickly become stale. - query: sum by (tenant) (tempodb_blocklist_tenant_index_builder) == 0 and max(tempodb_blocklist_length) > 0 + query: sum by (tenant) (tempodb_blocklist_tenant_index_builder) == 0 and on() max(tempodb_blocklist_length) > 0 severity: critical for: 5m - name: Tempo tenant index too old @@ -4199,7 +4199,7 @@ groups: Threshold of 600s (10 minutes). Adjust based on your tenant index build interval. - name: Tempo block list rising quickly description: Tempo blocklist length is up {{ printf "%.0f" $value }}% over the last 7 days. Consider scaling compactors. - query: avg(tempodb_blocklist_length) / avg(tempodb_blocklist_length offset 7d) > 1.4 + query: (avg(tempodb_blocklist_length) / avg(tempodb_blocklist_length offset 7d) - 1) * 100 > 40 severity: critical for: 15m comments: | @@ -4299,7 +4299,7 @@ groups: for: 5m - name: Mimir memory map areas too high description: 'Mimir {{ $labels.job }} is using {{ printf "%.0f" $value }}% of its memory map area limit.' - query: 'process_memory_map_areas{job=~".*(ingester|cortex|mimir|store-gateway).*"} / process_memory_map_areas_limit{job=~".*(ingester|cortex|mimir|store-gateway).*"} > 0.8' + query: 'process_memory_map_areas{job=~".*(ingester|cortex|mimir|store-gateway).*"} / process_memory_map_areas_limit{job=~".*(ingester|cortex|mimir|store-gateway).*"} * 100 > 80' severity: critical for: 5m - name: Mimir ingester instance has no tenants @@ -4330,32 +4330,32 @@ groups: # Instance limits - name: Mimir ingester reaching series limit warning description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its series limit.' - query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} > 0.8) and cortex_ingester_instance_limits{limit="max_series"} > 0' + query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} * 100 > 80) and cortex_ingester_instance_limits{limit="max_series"} > 0' severity: warning for: 3h - name: Mimir ingester reaching series limit critical description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its series limit.' - query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} > 0.9) and cortex_ingester_instance_limits{limit="max_series"} > 0' + query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} * 100 > 90) and cortex_ingester_instance_limits{limit="max_series"} > 0' severity: critical for: 5m - name: Mimir ingester reaching tenants limit warning description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its tenants limit.' - query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} > 0.7) and cortex_ingester_instance_limits{limit="max_tenants"} > 0' + query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} * 100 > 70) and cortex_ingester_instance_limits{limit="max_tenants"} > 0' severity: warning for: 5m - name: Mimir ingester reaching tenants limit critical description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its tenants limit.' - query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} > 0.8) and cortex_ingester_instance_limits{limit="max_tenants"} > 0' + query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} * 100 > 80) and cortex_ingester_instance_limits{limit="max_tenants"} > 0' severity: critical for: 5m - name: Mimir reaching TCP connections limit description: 'Mimir instance {{ $labels.instance }} is using {{ printf "%.0f" $value }}% of its TCP connections limit.' - query: cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and cortex_tcp_connections_limit > 0 + query: cortex_tcp_connections / cortex_tcp_connections_limit * 100 > 80 and cortex_tcp_connections_limit > 0 severity: critical for: 5m - name: Mimir distributor inflight requests high - description: 'Mimir distributor {{ $labels.instance }} has too many inflight push requests.' - query: '(cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0.8) and cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0' + description: 'Mimir distributor {{ $labels.instance }} is using {{ printf "%.0f" $value }}% of its inflight push requests limit.' + query: '(cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"} * 100 > 80) and cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0' severity: critical for: 5m # Blocks and TSDB