groups: - name: EmbeddedExporter rules: - alert: CortexRulerConfigurationReloadFailure expr: 'cortex_ruler_config_last_reload_successful != 1' for: 0m labels: severity: warning annotations: summary: Cortex ruler configuration reload failure (instance {{ $labels.instance }}) description: "Cortex ruler configuration reload failure (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: CortexNotConnectedToAlertmanager expr: 'cortex_prometheus_notifications_alertmanagers_discovered < 1' for: 0m labels: severity: critical annotations: summary: Cortex not connected to Alertmanager (instance {{ $labels.instance }}) description: "Cortex not connected to Alertmanager (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold of 0.05/s avoids firing on transient single-event spikes. - alert: CortexNotificationsAreBeingDropped expr: 'rate(cortex_prometheus_notifications_dropped_total[5m]) > 0.05' for: 0m labels: severity: critical annotations: summary: Cortex notifications are being dropped (instance {{ $labels.instance }}) description: "Cortex notifications are being dropped due to errors (instance {{ $labels.instance }}, {{ $value | humanize }}/s).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold of 0.05/s avoids firing on transient single-event spikes. - alert: CortexNotificationErrors expr: 'rate(cortex_prometheus_notifications_errors_total[5m]) > 0.05' for: 0m labels: severity: critical annotations: summary: Cortex notification errors (instance {{ $labels.instance }}) description: "Cortex is failing when sending alert notifications (instance {{ $labels.instance }}, {{ $value | humanize }}/s).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: CortexIngesterUnhealthy expr: 'cortex_ring_members{state="Unhealthy", name="ingester"} > 0' for: 0m labels: severity: critical annotations: summary: Cortex ingester unhealthy (instance {{ $labels.instance }}) description: "Cortex has an unhealthy ingester\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: CortexFrontendQueriesStuck expr: 'sum by (job) (cortex_query_frontend_queue_length) > 0' for: 5m labels: severity: critical annotations: summary: Cortex frontend queries stuck (instance {{ $labels.instance }}) description: "There are queued up queries in query-frontend.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"