Add new ClickHouse alert rules for monitoring

This commit is contained in:
Samuel Berthe 2025-08-28 22:46:00 +02:00 committed by GitHub
parent 3d127f9b18
commit 6f7d265fe1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1338,6 +1338,11 @@ groups:
slug: embedded-exporter slug: embedded-exporter
doc_url: https://clickhouse.com/docs/en/operations/system-tables/metrics doc_url: https://clickhouse.com/docs/en/operations/system-tables/metrics
rules: rules:
- name: ClickHouse node down
description: No metrics received from ClickHouse exporter for over 2 minutes.
query: "up{job="clickhouse"} == 0"
severity: critical
for: 2m
- name: ClickHouse Memory Usage Critical - name: ClickHouse Memory Usage Critical
description: Memory usage is critically high, over 90%. description: Memory usage is critically high, over 90%.
query: "ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 90" query: "ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 90"
@ -1412,6 +1417,34 @@ groups:
query: "increase(ClickHouseErrorMetric_RESOURCE_ACCESS_DENIED[5m]) > 0" query: "increase(ClickHouseErrorMetric_RESOURCE_ACCESS_DENIED[5m]) > 0"
severity: info severity: info
for: 0m for: 0m
- name: ClickHouse rejected insert queries
description: INSERTs rejected due to too many active data parts. Reduce insert frequency.
query: "increase(ClickHouseProfileEvents_RejectedInserts[1m]) > 0"
severity: warning
for: 1m
- name: ClickHouse delayed insert queries
description: INSERTs delayed due to high number of active parts.
query: "increase(ClickHouseProfileEvents_DelayedInserts[5m]) > 0"
severity: warning
for: 2m
- name: ClickHouse zookeeper hardware exception
description: Zookeeper hardware exception: network issues communicating with ZooKeeper
query: "increase(ClickHouseProfileEvents_ZooKeeperHardwareExceptions[1m]) > 0"
severity: critical
for: 1m
- name: ClickHouse high network usage
description: High network usage. ClickHouse network usage exceeds 100MB/s.
query: "rate(ClickHouseProfileEvents_NetworkSendBytes[1m]) > 100*1024*1024 or rate(ClickHouseProfileEvents_NetworkReceiveBytes[1m]) > 100*1024*1024"
severity: warning
for: 2m
comments: |
Please replace the threshold with an appropriate value
- name: ClickHouse distributed rejected inserts
description: INSERTs into Distributed tables rejected due to pending bytes limit.
query: "increase(ClickHouseProfileEvents_DistributedRejectedInserts[5m]) > 0"
severity: critical
for: 2m
- name: Zookeeper - name: Zookeeper
exporters: exporters: