From 03fdabbfc5f814c9c862931379ba579d100dabfa Mon Sep 17 00:00:00 2001 From: Pooya Date: Mon, 27 Jun 2022 19:59:07 +0430 Subject: [PATCH] Changed metric names to match new metric names. (#291) * Changed alert names to match new alert names. * Added MongodbReplicaMemberHealth to check health of replica members health which is added in new metrics Co-authored-by: Pooya Dowlatabadi Co-authored-by: Samuel Berthe --- _data/rules.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 7292022..94de519 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -759,32 +759,36 @@ groups: description: MongoDB instance is down query: 'mongodb_up == 0' severity: critical + - name: Mongodb replica member unhealthy + description: MongoDB replica member is not healthy + query: 'mongodb_rs_members_health == 0' + severity: critical - name: MongoDB replication lag description: Mongodb replication lag is more than 10s - query: 'mongodb_mongod_replset_member_optime_date{state="PRIMARY"} - ON (set) mongodb_mongod_replset_member_optime_date{state="SECONDARY"} > 10' + query: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10' severity: critical - name: MongoDB replication headroom description: MongoDB replication headroom is <= 0 - query: '(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp) - (avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}))) <= 0' + query: 'sum(avg(mongodb_oplog_stats_start - mongodb_oplog_stats_end)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0' severity: critical - name: MongoDB number cursors open description: Too many cursors opened by MongoDB for clients (> 10k) - query: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10 * 1000' + query: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000' severity: warning for: 2m - name: MongoDB cursors timeouts description: Too many cursors are timing out - query: 'increase(mongodb_mongod_metrics_cursor_timed_out_total[1m]) > 100' + query: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100' severity: warning for: 2m - name: MongoDB too many connections description: Too many connections (> 80%) - query: 'avg by(instance) (rate(mongodb_connections{state="current"}[1m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80' + query: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80' severity: warning for: 2m - name: MongoDB virtual memory usage description: High memory usage - query: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3' + query: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3' severity: warning for: 2m