mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-20 16:46:37 +08:00
Publish
This commit is contained in:
parent
4c9da9ed24
commit
43427987af
1 changed files with 36 additions and 0 deletions
36
dist/rules/litellm/embedded-exporter.yml
vendored
Normal file
36
dist/rules/litellm/embedded-exporter.yml
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
groups:
|
||||
|
||||
- name: EmbeddedExporter
|
||||
|
||||
|
||||
rules:
|
||||
|
||||
# The threshold (1) is in USD. The `model` label carries the resolved model-name (post-routing).
|
||||
# PromQL `increase()` requires ≥2 datapoints with growth-difference to extrapolate positive —
|
||||
# for brand-new counter series this needs ≥2 distinct request bursts ≥1 scrape-cycle apart.
|
||||
- alert: LitellmProviderSpendOverBudget
|
||||
expr: 'sum(increase(litellm_spend_metric_total{model=~"(claude-|anthropic/).*"}[24h])) > 1'
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: LiteLLM provider spend over budget (instance {{ $labels.instance }})
|
||||
description: "Cumulative spend for an LLM provider has exceeded the daily budget threshold. Replace the regex `(claude-|anthropic/).*` with your provider's model-name pattern. Useful as a soft-warning when `provider_budget_config` hard-cap is unavailable or disabled.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: LitellmProxyFailedRequestsRateHigh
|
||||
expr: 'sum(rate(litellm_proxy_failed_requests_metric_total[5m])) / sum(rate(litellm_proxy_total_requests_metric_total[5m])) > 0.05'
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: LiteLLM proxy failed requests rate high (instance {{ $labels.instance }})
|
||||
description: "LiteLLM proxy is returning failed responses to clients (>5% error rate over 5min). Investigate downstream LLM provider availability or auth issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: LitellmRequestLatencyP95High
|
||||
expr: 'histogram_quantile(0.95, sum(rate(litellm_request_total_latency_metric_bucket[5m])) by (le)) > 10'
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: LiteLLM request latency p95 high (instance {{ $labels.instance }})
|
||||
description: "LiteLLM request total latency p95 exceeds 10 seconds over 5min. Check downstream LLM provider response-times and proxy queue-depth.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
Loading…
Reference in a new issue