mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-26 11:27:00 +08:00
Update rules.yml
This commit is contained in:
parent
113d0699f1
commit
0971c4230a
1 changed files with 12 additions and 15 deletions
|
|
@ -1000,62 +1000,59 @@ groups:
|
||||||
rules:
|
rules:
|
||||||
- name: 'Cassandra Node is unavailable
|
- name: 'Cassandra Node is unavailable
|
||||||
description: 'Cassandra Node is unavailable - {{ $labels.cassandra_cluster }} {{ $labels.exported_endpoint }}'
|
description: 'Cassandra Node is unavailable - {{ $labels.cassandra_cluster }} {{ $labels.exported_endpoint }}'
|
||||||
query: 'sum(min_over_time(cassandra_endpoint_active{cassandra_cluster=~".*"}[1m])) by (cassandra_cluster,instance,exported_endpoint) < 1'
|
query: 'sum(min_over_time(cassandra_endpoint_active{})) by (cassandra_cluster,instance,exported_endpoint) < 1'
|
||||||
for: 2m
|
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: 'Cassandra many compaction tasks are pending'
|
- name: 'Cassandra many compaction tasks are pending'
|
||||||
description: 'Many Cassandra compaction tasks are pending - {{ $labels.cassandra_cluster }}'
|
description: 'Many Cassandra compaction tasks are pending - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'avg_over_time(cassandra_table_estimated_pending_compactions{cassandra_cluster=~".*"}[1m]) > 100'
|
query: 'cassandra_table_estimated_pending_compactions{} > 100'
|
||||||
for: 2m
|
|
||||||
severity: warning
|
severity: warning
|
||||||
- name: 'Cassandra commitlog pending tasks'
|
- name: 'Cassandra commitlog pending tasks'
|
||||||
description: 'Cassandra commitlog pending tasks - {{ $labels.cassandra_cluster }}'
|
description: 'Cassandra commitlog pending tasks - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'avg_over_time(cassandra_commit_log_pending_tasks{cassandra_cluster=~".*"}[1m]) > 15'
|
query: 'avg_over_time(cassandra_commit_log_pending_tasks{}[1m]) > 15'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: warning
|
severity: warning
|
||||||
- name: 'Cassandra compaction executor blocked tasks'
|
- name: 'Cassandra compaction executor blocked tasks'
|
||||||
description: 'Some Cassandra compaction executor tasks are blocked - {{ $labels.cassandra_cluster }}'
|
description: 'Some Cassandra compaction executor tasks are blocked - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'avg(avg_over_time(cassandra_thread_pool_blocked_tasks{cassandra_cluster=~".*",pool="CompactionExecutor"}[1m])) by (cassandra_cluster,instance) > 15'
|
query: 'avg(avg_over_time(cassandra_thread_pool_blocked_tasks{pool="CompactionExecutor"}[1m])) by (cassandra_cluster,instance) > 15'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: warning
|
severity: warning
|
||||||
- name: 'Cassandra flush writer blocked tasks'
|
- name: 'Cassandra flush writer blocked tasks'
|
||||||
description: 'Some Cassandra flush writer tasks are blocked - {{ $labels.cassandra_cluster }}'
|
description: 'Some Cassandra flush writer tasks are blocked - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'avg(avg_over_time(cassandra_thread_pool_blocked_tasks{cassandra_cluster=~".*",pool="MemtableFlushWriter"}[1m])) by (cassandra_cluster,instance) > 15'
|
query: 'avg(avg_over_time(cassandra_thread_pool_blocked_tasks{pool="MemtableFlushWriter"}[1m])) by (cassandra_cluster,instance) > 15'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: warning
|
severity: warning
|
||||||
- name: 'Cassandra connection timeouts total'
|
- name: 'Cassandra connection timeouts total'
|
||||||
description: 'Some connection between nodes are ending in timeout - {{ $labels.cassandra_cluster }}'
|
description: 'Some connection between nodes are ending in timeout - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'avg(avg_over_time(cassandra_client_request_timeouts_total{cassandra_cluster=~".*"}[1m])) by (cassandra_cluster,instance) > 5'
|
query: 'avg(avg_over_time(cassandra_client_request_timeouts_total{}[1m])) by (cassandra_cluster,instance) > 5'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: 'Cassandra storage exceptions'
|
- name: 'Cassandra storage exceptions'
|
||||||
description: 'Something is going wrong with cassandra storage - {{ $labels.cassandra_cluster }}'
|
description: 'Something is going wrong with cassandra storage - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'changes(cassandra_storage_exceptions_total{cassandra_cluster=~".*"}[1m]) > 1'
|
query: 'changes(cassandra_storage_exceptions_total{}[1m]) > 1'
|
||||||
for: 2m
|
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: 'Cassandra tombstone dump'
|
- name: 'Cassandra tombstone dump'
|
||||||
description: 'Cassandra tombstone dump - {{ $labels.cassandra_cluster }}'
|
description: 'Cassandra tombstone dump - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'avg(avg_over_time(cassandra_table_tombstones_scanned{cassandra_cluster=~".*",quantile="0.99"}[1m])) by (instance,cassandra_cluster,keyspace) > 100'
|
query: 'avg(avg_over_time(cassandra_table_tombstones_scanned{quantile="0.99"}[1m])) by (instance,cassandra_cluster,keyspace) > 100'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: 'Cassandra client request unvailable write'
|
- name: 'Cassandra client request unvailable write'
|
||||||
description: 'Some Cassandra client requests are unvailable to write - {{ $labels.cassandra_cluster }}'
|
description: 'Some Cassandra client requests are unvailable to write - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'changes(cassandra_client_request_unavailable_exceptions_total{cassandra_cluster=~".*",operation="write"}[1m]) > 0'
|
query: 'changes(cassandra_client_request_unavailable_exceptions_total{operation="write"}[1m]) > 0'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: 'Cassandra client request unvailable read'
|
- name: 'Cassandra client request unvailable read'
|
||||||
description: 'Some Cassandra client requests are unvailable to read - {{ $labels.cassandra_cluster }}'
|
description: 'Some Cassandra client requests are unvailable to read - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'changes(cassandra_client_request_unavailable_exceptions_total{cassandra_cluster=~".*",operation="read"}[1m]) > 0'
|
query: 'changes(cassandra_client_request_unavailable_exceptions_total{operation="read"}[1m]) > 0'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: 'Cassandra client request write failure'
|
- name: 'Cassandra client request write failure'
|
||||||
description: 'Read failures have occurred, ensure there are not too many unavailable nodes - {{ $labels.cassandra_cluster }}'
|
description: 'Read failures have occurred, ensure there are not too many unavailable nodes - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'increase(cassandra_client_request_failures_total{job="cassandra",envtype="literatum",operation="write"}[1m]) > 0'
|
query: 'increase(cassandra_client_request_failures_total{operation="write"}[1m]) > 0'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: 'Cassandra client request read failure'
|
- name: 'Cassandra client request read failure'
|
||||||
description: 'Read failures have occurred, ensure there are not too many unavailable nodes - {{ $labels.cassandra_cluster }}'
|
description: 'Read failures have occurred, ensure there are not too many unavailable nodes - {{ $labels.cassandra_cluster }}'
|
||||||
query: 'increase(cassandra_client_request_failures_total{job="cassandra",envtype="literatum",operation="read"}[1m]) > 0'
|
query: 'increase(cassandra_client_request_failures_total{operation="read"}[1m]) > 0'
|
||||||
for: 2m
|
for: 2m
|
||||||
severity: critical
|
severity: critical
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue