From 69a76dea8ff8f67fc35c805ffce37728bc62b231 Mon Sep 17 00:00:00 2001 From: samber Date: Mon, 27 Jun 2022 15:29:56 +0000 Subject: [PATCH] Publish --- .../mongodb/percona-mongodb-exporter.yml | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/dist/rules/mongodb/percona-mongodb-exporter.yml b/dist/rules/mongodb/percona-mongodb-exporter.yml index 558adb5..ef25081 100644 --- a/dist/rules/mongodb/percona-mongodb-exporter.yml +++ b/dist/rules/mongodb/percona-mongodb-exporter.yml @@ -13,8 +13,17 @@ groups: summary: MongoDB Down (instance {{ $labels.instance }}) description: "MongoDB instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: MongodbReplicaMemberUnhealthy + expr: 'mongodb_rs_members_health == 0' + for: 0m + labels: + severity: critical + annotations: + summary: Mongodb replica member unhealthy (instance {{ $labels.instance }}) + description: "MongoDB replica member is not healthy\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: MongodbReplicationLag - expr: 'mongodb_mongod_replset_member_optime_date{state="PRIMARY"} - ON (set) mongodb_mongod_replset_member_optime_date{state="SECONDARY"} > 10' + expr: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10' for: 0m labels: severity: critical @@ -23,7 +32,7 @@ groups: description: "Mongodb replication lag is more than 10s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbReplicationHeadroom - expr: '(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp) - (avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}))) <= 0' + expr: 'sum(avg(mongodb_oplog_stats_start - mongodb_oplog_stats_end)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0' for: 0m labels: severity: critical @@ -32,7 +41,7 @@ groups: description: "MongoDB replication headroom is <= 0\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbNumberCursorsOpen - expr: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10 * 1000' + expr: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000' for: 2m labels: severity: warning @@ -41,7 +50,7 @@ groups: description: "Too many cursors opened by MongoDB for clients (> 10k)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbCursorsTimeouts - expr: 'increase(mongodb_mongod_metrics_cursor_timed_out_total[1m]) > 100' + expr: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100' for: 2m labels: severity: warning @@ -50,7 +59,7 @@ groups: description: "Too many cursors are timing out\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbTooManyConnections - expr: 'avg by(instance) (rate(mongodb_connections{state="current"}[1m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80' + expr: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80' for: 2m labels: severity: warning @@ -59,7 +68,7 @@ groups: description: "Too many connections (> 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbVirtualMemoryUsage - expr: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3' + expr: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3' for: 2m labels: severity: warning