awesome-prometheus-alerts/dist/rules/google-cloud-stackdriver/stackdriver-exporter.yml
2026-03-16 13:07:45 +00:00

53 lines
2.5 KiB
YAML

groups:
- name: StackdriverExporter
# Self-monitoring metrics use the stackdriver_monitoring_* prefix.
# All self-monitoring metrics include a project_id label.
rules:
- alert: StackdriverExporterScrapeError
expr: 'stackdriver_monitoring_last_scrape_error > 0'
for: 5m
labels:
severity: warning
annotations:
summary: Stackdriver exporter scrape error (instance {{ $labels.instance }})
description: "Stackdriver exporter failed to scrape metrics from Google Cloud Monitoring API for project {{ $labels.project_id }}.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: StackdriverExporterSlowScrape
expr: 'stackdriver_monitoring_last_scrape_duration_seconds > 300'
for: 5m
labels:
severity: warning
annotations:
summary: Stackdriver exporter slow scrape (instance {{ $labels.instance }})
description: "Stackdriver exporter scrape for project {{ $labels.project_id }} is taking more than 5 minutes ({{ $value }}s).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: StackdriverExporterScrapeErrorsIncreasing
expr: 'increase(stackdriver_monitoring_scrape_errors_total[15m]) > 5'
for: 0m
labels:
severity: warning
annotations:
summary: Stackdriver exporter scrape errors increasing (instance {{ $labels.instance }})
description: "Stackdriver exporter has had {{ $value }} scrape errors in the last 15 minutes for project {{ $labels.project_id }}.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: StackdriverExporterHighApiCalls
expr: 'rate(stackdriver_monitoring_api_calls_total[5m]) * 60 > 100'
for: 0m
labels:
severity: warning
annotations:
summary: Stackdriver exporter high API calls (instance {{ $labels.instance }})
description: "Stackdriver exporter is making {{ $value }} API calls per minute for project {{ $labels.project_id }}. This may hit Google Cloud Monitoring API quotas.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: StackdriverExporterScrapeStale
expr: 'time() - stackdriver_monitoring_last_scrape_timestamp > 600'
for: 0m
labels:
severity: warning
annotations:
summary: Stackdriver exporter scrape stale (instance {{ $labels.instance }})
description: "Stackdriver exporter has not successfully scraped metrics for project {{ $labels.project_id }} in the last 10 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"