awesome-prometheus-alerts/dist/rules/thanos/thanos-sidecar.yml
2023-03-15 17:27:02 +00:00

23 lines
1 KiB
YAML

groups:
- name: ThanosSidecar
rules:
- alert: ThanosSidecarBucketOperationsFailed
expr: 'sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~".*thanos-sidecar.*"}[5m])) > 0'
for: 5m
labels:
severity: critical
annotations:
summary: Thanos Sidecar Bucket Operations Failed (instance {{ $labels.instance }})
description: "Thanos Sidecar {{$labels.instance}} bucket operations are failing\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: ThanosSidecarNoConnectionToStartedPrometheus
expr: 'thanos_sidecar_prometheus_up{job=~".*thanos-sidecar.*"} == 0 and on (namespace, pod)prometheus_tsdb_data_replay_duration_seconds != 0'
for: 5m
labels:
severity: critical
annotations:
summary: Thanos Sidecar No Connection To Started Prometheus (instance {{ $labels.instance }})
description: "Thanos Sidecar {{$labels.instance}} is unhealthy.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"