Changed metric names to match new metric names. (#291)

* Changed alert names to match new alert names.

* Added MongodbReplicaMemberHealth to check health of replica members health which is added in new metrics

Co-authored-by: Pooya Dowlatabadi <pooya.dowlatabadi@arvancloud.com>
Co-authored-by: Samuel Berthe <dev@samuel-berthe.fr>
This commit is contained in:
Pooya 2022-06-27 19:59:07 +04:30 committed by GitHub
parent 4201302285
commit 03fdabbfc5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -759,32 +759,36 @@ groups:
description: MongoDB instance is down
query: 'mongodb_up == 0'
severity: critical
- name: Mongodb replica member unhealthy
description: MongoDB replica member is not healthy
query: 'mongodb_rs_members_health == 0'
severity: critical
- name: MongoDB replication lag
description: Mongodb replication lag is more than 10s
query: 'mongodb_mongod_replset_member_optime_date{state="PRIMARY"} - ON (set) mongodb_mongod_replset_member_optime_date{state="SECONDARY"} > 10'
query: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10'
severity: critical
- name: MongoDB replication headroom
description: MongoDB replication headroom is <= 0
query: '(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp) - (avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}))) <= 0'
query: 'sum(avg(mongodb_oplog_stats_start - mongodb_oplog_stats_end)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0'
severity: critical
- name: MongoDB number cursors open
description: Too many cursors opened by MongoDB for clients (> 10k)
query: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10 * 1000'
query: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000'
severity: warning
for: 2m
- name: MongoDB cursors timeouts
description: Too many cursors are timing out
query: 'increase(mongodb_mongod_metrics_cursor_timed_out_total[1m]) > 100'
query: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100'
severity: warning
for: 2m
- name: MongoDB too many connections
description: Too many connections (> 80%)
query: 'avg by(instance) (rate(mongodb_connections{state="current"}[1m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80'
query: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80'
severity: warning
for: 2m
- name: MongoDB virtual memory usage
description: High memory usage
query: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3'
query: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3'
severity: warning
for: 2m