mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-24 18:36:59 +08:00
doc: improve pulsar doc
This commit is contained in:
parent
074e3e6d04
commit
4662cd2812
2 changed files with 24 additions and 22 deletions
|
|
@ -36,6 +36,7 @@ Collection available here: **[https://awesome-prometheus-alerts.grep.to](https:/
|
||||||
- [Cassandra](https://awesome-prometheus-alerts.grep.to/rules#cassandra)
|
- [Cassandra](https://awesome-prometheus-alerts.grep.to/rules#cassandra)
|
||||||
- [Zookeeper](https://awesome-prometheus-alerts.grep.to/rules#zookeeper)
|
- [Zookeeper](https://awesome-prometheus-alerts.grep.to/rules#zookeeper)
|
||||||
- [Kafka](https://awesome-prometheus-alerts.grep.to/rules#kafka)
|
- [Kafka](https://awesome-prometheus-alerts.grep.to/rules#kafka)
|
||||||
|
- [Pulsar](https://awesome-prometheus-alerts.grep.to/rules#pulsar)
|
||||||
- [Solr](https://awesome-prometheus-alerts.grep.to/rules#solr)
|
- [Solr](https://awesome-prometheus-alerts.grep.to/rules#solr)
|
||||||
|
|
||||||
#### Reverse proxies and load balancers
|
#### Reverse proxies and load balancers
|
||||||
|
|
|
||||||
|
|
@ -1200,57 +1200,59 @@ groups:
|
||||||
|
|
||||||
- name: Pulsar
|
- name: Pulsar
|
||||||
exporters:
|
exporters:
|
||||||
- rules:
|
- name: embedded exporter
|
||||||
- name: Pulsar Subscription High Number Of Backlog Entries
|
doc_url: https://pulsar.apache.org/docs/reference-metrics/
|
||||||
|
rules:
|
||||||
|
- name: Pulsar subscription high number of backlog entries
|
||||||
|
description: "The number of subscription backlog entries is over 5k"
|
||||||
query: sum(pulsar_subscription_back_log) by (subscription) > 5000
|
query: sum(pulsar_subscription_back_log) by (subscription) > 5000
|
||||||
for: 1h
|
for: 1h
|
||||||
severity: warning
|
severity: warning
|
||||||
description: "The number of subscription backlog entries is over 5k\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar subscription very high number of backlog entries
|
||||||
- name: Pulsar Subscription Very High Number Of Backlog Entries
|
description: "The number of subscription backlog entries is over 100k"
|
||||||
query: sum(pulsar_subscription_back_log) by (subscription) > 100000
|
query: sum(pulsar_subscription_back_log) by (subscription) > 100000
|
||||||
for: 1h
|
for: 1h
|
||||||
severity: critical
|
severity: critical
|
||||||
description: "The number of subscription backlog entries is over 100k\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar topic large backlog storage size
|
||||||
- name: Pulsar Topic Large Backlog Storage Size
|
description: "The topic backlog storage size is over 5 GB"
|
||||||
query: sum(pulsar_storage_size > 5*1024*1024*1024) by (topic)
|
query: sum(pulsar_storage_size > 5*1024*1024*1024) by (topic)
|
||||||
for: 1h
|
for: 1h
|
||||||
severity: warning
|
severity: warning
|
||||||
description: "The topic backlog storage size is over 5 GB\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar topic very large backlog storage size
|
||||||
- name: PulsarTopicVeryLargeBacklogStorageSize
|
description: "The topic backlog storage size is over 20 GB"
|
||||||
query: sum(pulsar_storage_size > 20*1024*1024*1024) by (topic)
|
query: sum(pulsar_storage_size > 20*1024*1024*1024) by (topic)
|
||||||
for: 1h
|
for: 1h
|
||||||
severity: critical
|
severity: critical
|
||||||
description: "The topic backlog storage size is over 20 GB\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar high write latency
|
||||||
- name: Pulsar High Write Latency
|
description: "Messages cannot be written in a timely fashion"
|
||||||
query: sum(pulsar_storage_write_latency_overflow > 0) by (topic)
|
query: sum(pulsar_storage_write_latency_overflow > 0) by (topic)
|
||||||
for: 1h
|
for: 1h
|
||||||
severity: critical
|
severity: critical
|
||||||
description: "Messages cannot be written in a timely fashion\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar large message payload
|
||||||
- name: Pulsar Large Message Payload
|
description: "Observing large message payload (> 1MB)"
|
||||||
query: sum(pulsar_entry_size_overflow > 0) by (topic)
|
query: sum(pulsar_entry_size_overflow > 0) by (topic)
|
||||||
for: 1h
|
for: 1h
|
||||||
severity: warning
|
severity: warning
|
||||||
description: "Observing large message payload (> 1MB)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar high ledger disk usage
|
||||||
- name: PulsarHighLedgerDiskUsage
|
description: "Observing Ledger Disk Usage (> 75%)"
|
||||||
query: sum(bookie_ledger_dir__pulsar_data_bookkeeper_ledgers_usage) by (kubernetes_pod_name) > 75
|
query: sum(bookie_ledger_dir__pulsar_data_bookkeeper_ledgers_usage) by (kubernetes_pod_name) > 75
|
||||||
for: 1h
|
for: 1h
|
||||||
severity: critical
|
severity: critical
|
||||||
description: "Observing Ledger Disk Usage (> 75%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar read only bookies
|
||||||
- name: Pulsar Read Only Bookies
|
description: "Observing Readonly Bookies"
|
||||||
query: count(bookie_SERVER_STATUS{} == 0) by (pod)
|
query: count(bookie_SERVER_STATUS{} == 0) by (pod)
|
||||||
for: 5m
|
for: 5m
|
||||||
severity: critical
|
severity: critical
|
||||||
description: "Observing Readonly Bookies\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar high number of function errors
|
||||||
- name: Pulsar High Number Of Function Errors
|
description: "Observing more than 10 Function errors per minute"
|
||||||
query: sum((rate(pulsar_function_user_exceptions_total{}[1m]) + rate(pulsar_function_system_exceptions_total{}[1m])) > 10) by (name)
|
query: sum((rate(pulsar_function_user_exceptions_total{}[1m]) + rate(pulsar_function_system_exceptions_total{}[1m])) > 10) by (name)
|
||||||
for: 1m
|
for: 1m
|
||||||
severity: critical
|
severity: critical
|
||||||
description: "Observing more than 10 Function errors per minute\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
- name: Pulsar high number of sink errors
|
||||||
- name: Pulsar High Number Of Sink Errors
|
description: "Observing more than 10 Sink errors per minute"
|
||||||
query: sum(rate(pulsar_sink_sink_exceptions_total{}[1m]) > 10) by (name)
|
query: sum(rate(pulsar_sink_sink_exceptions_total{}[1m]) > 10) by (name)
|
||||||
for: 1m
|
for: 1m
|
||||||
severity: critical
|
severity: critical
|
||||||
description: "Observing more than 10 Sink errors per minute\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- name: Solr
|
- name: Solr
|
||||||
exporters:
|
exporters:
|
||||||
|
|
@ -2206,4 +2208,3 @@ groups:
|
||||||
* FAILURE 2 false - The build had a fatal error.
|
* FAILURE 2 false - The build had a fatal error.
|
||||||
* NOT_BUILT 3 false - The module was not built.
|
* NOT_BUILT 3 false - The module was not built.
|
||||||
* ABORTED 4 false - The build was manually aborted.
|
* ABORTED 4 false - The build was manually aborted.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue