From 5acbc04e107e0c2c4c64898dd368d5f9673e087b Mon Sep 17 00:00:00 2001 From: Pooya Dowlatabadi Date: Mon, 27 Jun 2022 14:20:19 +0430 Subject: [PATCH] Reverted percona- added changes to rules.yml --- _data/rules.yml | 16 ++++++++------ .../mongodb/percona-mongodb-exporter.yml | 21 ++++++------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/_data/rules.yml b/_data/rules.yml index 7292022..50375d8 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -759,32 +759,36 @@ groups: description: MongoDB instance is down query: 'mongodb_up == 0' severity: critical + - name: MongodbReplicaMemberHealthy + description: MongoDB replica member is not healthy + query: 'mongodb_rs_members_health == 0' + severity: critical - name: MongoDB replication lag description: Mongodb replication lag is more than 10s - query: 'mongodb_mongod_replset_member_optime_date{state="PRIMARY"} - ON (set) mongodb_mongod_replset_member_optime_date{state="SECONDARY"} > 10' + query: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10' severity: critical - name: MongoDB replication headroom description: MongoDB replication headroom is <= 0 - query: '(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp) - (avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}))) <= 0' + query: 'sum(avg(mongodb_oplog_stats_start - mongodb_oplog_stats_end)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0' severity: critical - name: MongoDB number cursors open description: Too many cursors opened by MongoDB for clients (> 10k) - query: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10 * 1000' + query: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000' severity: warning for: 2m - name: MongoDB cursors timeouts description: Too many cursors are timing out - query: 'increase(mongodb_mongod_metrics_cursor_timed_out_total[1m]) > 100' + query: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100' severity: warning for: 2m - name: MongoDB too many connections description: Too many connections (> 80%) - query: 'avg by(instance) (rate(mongodb_connections{state="current"}[1m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80' + query: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80' severity: warning for: 2m - name: MongoDB virtual memory usage description: High memory usage - query: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3' + query: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3' severity: warning for: 2m diff --git a/dist/rules/mongodb/percona-mongodb-exporter.yml b/dist/rules/mongodb/percona-mongodb-exporter.yml index 1136dab..fd75d10 100644 --- a/dist/rules/mongodb/percona-mongodb-exporter.yml +++ b/dist/rules/mongodb/percona-mongodb-exporter.yml @@ -13,17 +13,8 @@ groups: summary: MongoDB Down (instance {{ $labels.instance }}) description: "MongoDB instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - - alert: MongodbReplicaMemberHealth - expr: 'mongodb_rs_members_health == 0' - for: 0m - labels: - severity: critical - annotations: - summary: MongoDB replica member not healthy {{ $labels.member_idx }} - description: "MongoDB replica member not healthy\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - - alert: MongodbReplicationLag - expr: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10' + expr: 'mongodb_mongod_replset_member_optime_date{state="PRIMARY"} - ON (set) mongodb_mongod_replset_member_optime_date{state="SECONDARY"} > 10' for: 0m labels: severity: critical @@ -41,7 +32,7 @@ groups: description: "MongoDB replication headroom is <= 0\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbNumberCursorsOpen - expr: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000' + expr: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10 * 1000' for: 2m labels: severity: warning @@ -50,7 +41,7 @@ groups: description: "Too many cursors opened by MongoDB for clients (> 10k)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbCursorsTimeouts - expr: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100' + expr: 'increase(mongodb_mongod_metrics_cursor_timed_out_total[1m]) > 100' for: 2m labels: severity: warning @@ -59,7 +50,7 @@ groups: description: "Too many cursors are timing out\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbTooManyConnections - expr: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80' + expr: 'avg by(instance) (rate(mongodb_connections{state="current"}[1m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80' for: 2m labels: severity: warning @@ -68,10 +59,10 @@ groups: description: "Too many connections (> 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbVirtualMemoryUsage - expr: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3' + expr: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3' for: 2m labels: severity: warning annotations: summary: MongoDB virtual memory usage (instance {{ $labels.instance }}) - description: "High memory usage\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "High memory usage\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" \ No newline at end of file