fix: address PR review comments on Tempo/Mimir rules

- Fix Tempo no tenant index builders: add on() for cross-label-set and
- Fix Tempo block list rising: output percentage instead of ratio
- Fix Mimir memory map areas: multiply by 100 to match % description
- Fix all instance limit rules: multiply by 100 to match % descriptions
- Fix distributor inflight requests: add % to description
This commit is contained in:
Samuel Berthe 2026-03-16 14:19:28 +01:00
parent a65c0e43c2
commit 28b1cc7dd7

View file

@ -4187,7 +4187,7 @@ groups:
severity: critical severity: critical
- name: Tempo no tenant index builders - name: Tempo no tenant index builders
description: No tenant index builders for tenant {{ $labels.tenant }}. Tenant index will quickly become stale. description: No tenant index builders for tenant {{ $labels.tenant }}. Tenant index will quickly become stale.
query: sum by (tenant) (tempodb_blocklist_tenant_index_builder) == 0 and max(tempodb_blocklist_length) > 0 query: sum by (tenant) (tempodb_blocklist_tenant_index_builder) == 0 and on() max(tempodb_blocklist_length) > 0
severity: critical severity: critical
for: 5m for: 5m
- name: Tempo tenant index too old - name: Tempo tenant index too old
@ -4199,7 +4199,7 @@ groups:
Threshold of 600s (10 minutes). Adjust based on your tenant index build interval. Threshold of 600s (10 minutes). Adjust based on your tenant index build interval.
- name: Tempo block list rising quickly - name: Tempo block list rising quickly
description: Tempo blocklist length is up {{ printf "%.0f" $value }}% over the last 7 days. Consider scaling compactors. description: Tempo blocklist length is up {{ printf "%.0f" $value }}% over the last 7 days. Consider scaling compactors.
query: avg(tempodb_blocklist_length) / avg(tempodb_blocklist_length offset 7d) > 1.4 query: (avg(tempodb_blocklist_length) / avg(tempodb_blocklist_length offset 7d) - 1) * 100 > 40
severity: critical severity: critical
for: 15m for: 15m
comments: | comments: |
@ -4299,7 +4299,7 @@ groups:
for: 5m for: 5m
- name: Mimir memory map areas too high - name: Mimir memory map areas too high
description: 'Mimir {{ $labels.job }} is using {{ printf "%.0f" $value }}% of its memory map area limit.' description: 'Mimir {{ $labels.job }} is using {{ printf "%.0f" $value }}% of its memory map area limit.'
query: 'process_memory_map_areas{job=~".*(ingester|cortex|mimir|store-gateway).*"} / process_memory_map_areas_limit{job=~".*(ingester|cortex|mimir|store-gateway).*"} > 0.8' query: 'process_memory_map_areas{job=~".*(ingester|cortex|mimir|store-gateway).*"} / process_memory_map_areas_limit{job=~".*(ingester|cortex|mimir|store-gateway).*"} * 100 > 80'
severity: critical severity: critical
for: 5m for: 5m
- name: Mimir ingester instance has no tenants - name: Mimir ingester instance has no tenants
@ -4330,32 +4330,32 @@ groups:
# Instance limits # Instance limits
- name: Mimir ingester reaching series limit warning - name: Mimir ingester reaching series limit warning
description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its series limit.' description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its series limit.'
query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} > 0.8) and cortex_ingester_instance_limits{limit="max_series"} > 0' query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} * 100 > 80) and cortex_ingester_instance_limits{limit="max_series"} > 0'
severity: warning severity: warning
for: 3h for: 3h
- name: Mimir ingester reaching series limit critical - name: Mimir ingester reaching series limit critical
description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its series limit.' description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its series limit.'
query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} > 0.9) and cortex_ingester_instance_limits{limit="max_series"} > 0' query: '(cortex_ingester_memory_series / ignoring(limit) cortex_ingester_instance_limits{limit="max_series"} * 100 > 90) and cortex_ingester_instance_limits{limit="max_series"} > 0'
severity: critical severity: critical
for: 5m for: 5m
- name: Mimir ingester reaching tenants limit warning - name: Mimir ingester reaching tenants limit warning
description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its tenants limit.' description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its tenants limit.'
query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} > 0.7) and cortex_ingester_instance_limits{limit="max_tenants"} > 0' query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} * 100 > 70) and cortex_ingester_instance_limits{limit="max_tenants"} > 0'
severity: warning severity: warning
for: 5m for: 5m
- name: Mimir ingester reaching tenants limit critical - name: Mimir ingester reaching tenants limit critical
description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its tenants limit.' description: 'Mimir ingester {{ $labels.instance }} has reached {{ printf "%.0f" $value }}% of its tenants limit.'
query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} > 0.8) and cortex_ingester_instance_limits{limit="max_tenants"} > 0' query: '(cortex_ingester_memory_users / ignoring(limit) cortex_ingester_instance_limits{limit="max_tenants"} * 100 > 80) and cortex_ingester_instance_limits{limit="max_tenants"} > 0'
severity: critical severity: critical
for: 5m for: 5m
- name: Mimir reaching TCP connections limit - name: Mimir reaching TCP connections limit
description: 'Mimir instance {{ $labels.instance }} is using {{ printf "%.0f" $value }}% of its TCP connections limit.' description: 'Mimir instance {{ $labels.instance }} is using {{ printf "%.0f" $value }}% of its TCP connections limit.'
query: cortex_tcp_connections / cortex_tcp_connections_limit > 0.8 and cortex_tcp_connections_limit > 0 query: cortex_tcp_connections / cortex_tcp_connections_limit * 100 > 80 and cortex_tcp_connections_limit > 0
severity: critical severity: critical
for: 5m for: 5m
- name: Mimir distributor inflight requests high - name: Mimir distributor inflight requests high
description: 'Mimir distributor {{ $labels.instance }} has too many inflight push requests.' description: 'Mimir distributor {{ $labels.instance }} is using {{ printf "%.0f" $value }}% of its inflight push requests limit.'
query: '(cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0.8) and cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0' query: '(cortex_distributor_inflight_push_requests / ignoring(limit) cortex_distributor_instance_limits{limit="max_inflight_push_requests"} * 100 > 80) and cortex_distributor_instance_limits{limit="max_inflight_push_requests"} > 0'
severity: critical severity: critical
for: 5m for: 5m
# Blocks and TSDB # Blocks and TSDB