From b47359c2fd0c9f25f07d4cd6eb882308c3feb98a Mon Sep 17 00:00:00 2001 From: Andre Martins <58525821+apmartins85@users.noreply.github.com> Date: Thu, 19 Aug 2021 15:31:46 -0300 Subject: [PATCH] added alerts to cortex (#240) * added alerts to cortex * Update rules.yml Co-authored-by: apmbktf Co-authored-by: Samuel Berthe --- _data/rules.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/_data/rules.yml b/_data/rules.yml index 0d2c6fb..3123901 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -2027,3 +2027,12 @@ groups: description: Cortex is failing when sengin alert notifications (instance {{ $labels.instance }}) query: rate(cortex_prometheus_notifications_errors_total[5m]) > 0 severity: critical + - name: Cortex ingester unhealthy + description: Cortex has an unhealthy ingester + query: cortex_ring_members{state="Unhealthy", name="ingester"} > 0 + severity: critical + - name: Cortex frontend queries stuck + description: There are queued up queries in query-frontend. + query: sum by (job) (cortex_query_frontend_queue_length) > 0 + severity: critical + for: 5m