mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-21 17:07:24 +08:00
Merge branch 'master' of github.com:samber/awesome-prometheus-alerts
This commit is contained in:
commit
67266bbca6
2 changed files with 25 additions and 12 deletions
|
|
@ -759,32 +759,36 @@ groups:
|
|||
description: MongoDB instance is down
|
||||
query: 'mongodb_up == 0'
|
||||
severity: critical
|
||||
- name: Mongodb replica member unhealthy
|
||||
description: MongoDB replica member is not healthy
|
||||
query: 'mongodb_rs_members_health == 0'
|
||||
severity: critical
|
||||
- name: MongoDB replication lag
|
||||
description: Mongodb replication lag is more than 10s
|
||||
query: 'mongodb_mongod_replset_member_optime_date{state="PRIMARY"} - ON (set) mongodb_mongod_replset_member_optime_date{state="SECONDARY"} > 10'
|
||||
query: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10'
|
||||
severity: critical
|
||||
- name: MongoDB replication headroom
|
||||
description: MongoDB replication headroom is <= 0
|
||||
query: '(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp) - (avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}))) <= 0'
|
||||
query: 'sum(avg(mongodb_oplog_stats_start - mongodb_oplog_stats_end)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0'
|
||||
severity: critical
|
||||
- name: MongoDB number cursors open
|
||||
description: Too many cursors opened by MongoDB for clients (> 10k)
|
||||
query: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10 * 1000'
|
||||
query: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000'
|
||||
severity: warning
|
||||
for: 2m
|
||||
- name: MongoDB cursors timeouts
|
||||
description: Too many cursors are timing out
|
||||
query: 'increase(mongodb_mongod_metrics_cursor_timed_out_total[1m]) > 100'
|
||||
query: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100'
|
||||
severity: warning
|
||||
for: 2m
|
||||
- name: MongoDB too many connections
|
||||
description: Too many connections (> 80%)
|
||||
query: 'avg by(instance) (rate(mongodb_connections{state="current"}[1m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80'
|
||||
query: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80'
|
||||
severity: warning
|
||||
for: 2m
|
||||
- name: MongoDB virtual memory usage
|
||||
description: High memory usage
|
||||
query: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3'
|
||||
query: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3'
|
||||
severity: warning
|
||||
for: 2m
|
||||
|
||||
|
|
|
|||
21
dist/rules/mongodb/percona-mongodb-exporter.yml
vendored
21
dist/rules/mongodb/percona-mongodb-exporter.yml
vendored
|
|
@ -13,8 +13,17 @@ groups:
|
|||
summary: MongoDB Down (instance {{ $labels.instance }})
|
||||
description: "MongoDB instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: MongodbReplicaMemberUnhealthy
|
||||
expr: 'mongodb_rs_members_health == 0'
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Mongodb replica member unhealthy (instance {{ $labels.instance }})
|
||||
description: "MongoDB replica member is not healthy\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: MongodbReplicationLag
|
||||
expr: 'mongodb_mongod_replset_member_optime_date{state="PRIMARY"} - ON (set) mongodb_mongod_replset_member_optime_date{state="SECONDARY"} > 10'
|
||||
expr: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10'
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
@ -23,7 +32,7 @@ groups:
|
|||
description: "Mongodb replication lag is more than 10s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: MongodbReplicationHeadroom
|
||||
expr: '(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp) - (avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}))) <= 0'
|
||||
expr: 'sum(avg(mongodb_oplog_stats_start - mongodb_oplog_stats_end)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0'
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
@ -32,7 +41,7 @@ groups:
|
|||
description: "MongoDB replication headroom is <= 0\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: MongodbNumberCursorsOpen
|
||||
expr: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10 * 1000'
|
||||
expr: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000'
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
@ -41,7 +50,7 @@ groups:
|
|||
description: "Too many cursors opened by MongoDB for clients (> 10k)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: MongodbCursorsTimeouts
|
||||
expr: 'increase(mongodb_mongod_metrics_cursor_timed_out_total[1m]) > 100'
|
||||
expr: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100'
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
@ -50,7 +59,7 @@ groups:
|
|||
description: "Too many cursors are timing out\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: MongodbTooManyConnections
|
||||
expr: 'avg by(instance) (rate(mongodb_connections{state="current"}[1m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80'
|
||||
expr: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80'
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
@ -59,7 +68,7 @@ groups:
|
|||
description: "Too many connections (> 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
|
||||
- alert: MongodbVirtualMemoryUsage
|
||||
expr: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3'
|
||||
expr: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3'
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
|
|||
Loading…
Reference in a new issue