awesome-prometheus-alerts/dist/rules/cassandra/instaclustr-cassandra-exporter.yml
Pedro Torres ec13582bb5
fix typo in instaclustr-cassandra-exporter.yml (#332)
unvailable ==> unavailable
2023-01-13 18:35:06 +01:00

113 lines
5.4 KiB
YAML

groups:
- name: InstaclustrCassandraExporter
rules:
- alert: CassandraNodeIsUnavailable
expr: 'sum(cassandra_endpoint_active) by (cassandra_cluster,instance,exported_endpoint) < 1'
for: 0m
labels:
severity: critical
annotations:
summary: Cassandra Node is unavailable (instance {{ $labels.instance }})
description: "Cassandra Node is unavailable - {{ $labels.cassandra_cluster }} {{ $labels.exported_endpoint }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraManyCompactionTasksArePending
expr: 'cassandra_table_estimated_pending_compactions > 100'
for: 0m
labels:
severity: warning
annotations:
summary: Cassandra many compaction tasks are pending (instance {{ $labels.instance }})
description: "Many Cassandra compaction tasks are pending - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraCommitlogPendingTasks
expr: 'cassandra_commit_log_pending_tasks > 15'
for: 2m
labels:
severity: warning
annotations:
summary: Cassandra commitlog pending tasks (instance {{ $labels.instance }})
description: "Cassandra commitlog pending tasks - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraCompactionExecutorBlockedTasks
expr: 'cassandra_thread_pool_blocked_tasks{pool="CompactionExecutor"} > 15'
for: 2m
labels:
severity: warning
annotations:
summary: Cassandra compaction executor blocked tasks (instance {{ $labels.instance }})
description: "Some Cassandra compaction executor tasks are blocked - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraFlushWriterBlockedTasks
expr: 'cassandra_thread_pool_blocked_tasks{pool="MemtableFlushWriter"} > 15'
for: 2m
labels:
severity: warning
annotations:
summary: Cassandra flush writer blocked tasks (instance {{ $labels.instance }})
description: "Some Cassandra flush writer tasks are blocked - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraConnectionTimeoutsTotal
expr: 'avg(cassandra_client_request_timeouts_total) by (cassandra_cluster,instance) > 5'
for: 2m
labels:
severity: critical
annotations:
summary: Cassandra connection timeouts total (instance {{ $labels.instance }})
description: "Some connection between nodes are ending in timeout - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraStorageExceptions
expr: 'changes(cassandra_storage_exceptions_total[1m]) > 1'
for: 0m
labels:
severity: critical
annotations:
summary: Cassandra storage exceptions (instance {{ $labels.instance }})
description: "Something is going wrong with cassandra storage - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraTombstoneDump
expr: 'avg(cassandra_table_tombstones_scanned{quantile="0.99"}) by (instance,cassandra_cluster,keyspace) > 100'
for: 2m
labels:
severity: critical
annotations:
summary: Cassandra tombstone dump (instance {{ $labels.instance }})
description: "Cassandra tombstone dump - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraClientRequestUnavailableWrite
expr: 'changes(cassandra_client_request_unavailable_exceptions_total{operation="write"}[1m]) > 0'
for: 2m
labels:
severity: critical
annotations:
summary: Cassandra client request unavailable write (instance {{ $labels.instance }})
description: "Some Cassandra client requests are unavailable to write - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraClientRequestUnavailableRead
expr: 'changes(cassandra_client_request_unavailable_exceptions_total{operation="read"}[1m]) > 0'
for: 2m
labels:
severity: critical
annotations:
summary: Cassandra client request unavailable read (instance {{ $labels.instance }})
description: "Some Cassandra client requests are unavailable to read - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraClientRequestWriteFailure
expr: 'increase(cassandra_client_request_failures_total{operation="write"}[1m]) > 0'
for: 2m
labels:
severity: critical
annotations:
summary: Cassandra client request write failure (instance {{ $labels.instance }})
description: "Read failures have occurred, ensure there are not too many unavailable nodes - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CassandraClientRequestReadFailure
expr: 'increase(cassandra_client_request_failures_total{operation="read"}[1m]) > 0'
for: 2m
labels:
severity: critical
annotations:
summary: Cassandra client request read failure (instance {{ $labels.instance }})
description: "Read failures have occurred, ensure there are not too many unavailable nodes - {{ $labels.cassandra_cluster }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"