From 43427987af9bb8d85dc9fd4745c7feb4bf1ccd44 Mon Sep 17 00:00:00 2001 From: samber Date: Wed, 29 Apr 2026 13:03:37 +0000 Subject: [PATCH] Publish --- dist/rules/litellm/embedded-exporter.yml | 36 ++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 dist/rules/litellm/embedded-exporter.yml diff --git a/dist/rules/litellm/embedded-exporter.yml b/dist/rules/litellm/embedded-exporter.yml new file mode 100644 index 0000000..5ed275f --- /dev/null +++ b/dist/rules/litellm/embedded-exporter.yml @@ -0,0 +1,36 @@ +groups: + +- name: EmbeddedExporter + + + rules: + + # The threshold (1) is in USD. The `model` label carries the resolved model-name (post-routing). + # PromQL `increase()` requires ≥2 datapoints with growth-difference to extrapolate positive — + # for brand-new counter series this needs ≥2 distinct request bursts ≥1 scrape-cycle apart. + - alert: LitellmProviderSpendOverBudget + expr: 'sum(increase(litellm_spend_metric_total{model=~"(claude-|anthropic/).*"}[24h])) > 1' + for: 5m + labels: + severity: warning + annotations: + summary: LiteLLM provider spend over budget (instance {{ $labels.instance }}) + description: "Cumulative spend for an LLM provider has exceeded the daily budget threshold. Replace the regex `(claude-|anthropic/).*` with your provider's model-name pattern. Useful as a soft-warning when `provider_budget_config` hard-cap is unavailable or disabled.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: LitellmProxyFailedRequestsRateHigh + expr: 'sum(rate(litellm_proxy_failed_requests_metric_total[5m])) / sum(rate(litellm_proxy_total_requests_metric_total[5m])) > 0.05' + for: 10m + labels: + severity: warning + annotations: + summary: LiteLLM proxy failed requests rate high (instance {{ $labels.instance }}) + description: "LiteLLM proxy is returning failed responses to clients (>5% error rate over 5min). Investigate downstream LLM provider availability or auth issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: LitellmRequestLatencyP95High + expr: 'histogram_quantile(0.95, sum(rate(litellm_request_total_latency_metric_bucket[5m])) by (le)) > 10' + for: 10m + labels: + severity: warning + annotations: + summary: LiteLLM request latency p95 high (instance {{ $labels.instance }}) + description: "LiteLLM request total latency p95 exceeds 10 seconds over 5min. Check downstream LLM provider response-times and proxy queue-depth.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"