More accurate alerts

added `mondodb instance down` alert and changed the `too many
connections` alert to fire when the connections are more than 80% of the
available connections.
removed `mongodb_replset_member_state` based alerts as I don't have
enough information on them
This commit is contained in:
Yashar Nesabian 2020-08-09 10:35:39 +04:30
parent 3ce1084f5b
commit d6b39a7f3f

View file

@ -538,6 +538,10 @@ groups:
- name: percona/mongodb_exporter
doc_url: https://github.com/percona/mongodb_exporter
rules:
- name: MongoDB Down
description: MongoDB instance is down
query: 'mongodb_up == 0'
severity: critical
- name: MongoDB replication lag
description: Mongodb replication lag is more than 10s
query: 'avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}) > 10'
@ -546,26 +550,6 @@ groups:
description: MongoDB replication headroom is <= 0
query: '(avg(mongodb_mongod_replset_oplog_tail_timestamp - mongodb_mongod_replset_oplog_head_timestamp) - (avg(mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY"}))) <= 0'
severity: critical
- name: MongoDB replication Status 3
description: MongoDB Replication set member either perform startup self-checks, or transition from completing a rollback or resync
query: "mongodb_mongod_replset_member_state == 3"
severity: critical
- name: MongoDB replication Status 6
description: MongoDB Replication set member as seen from another member of the set, is not yet known
query: "mongodb_mongod_replset_member_state == 6"
severity: critical
- name: MongoDB replication Status 8
description: MongoDB Replication set member as seen from another member of the set, is unreachable
query: "mongodb_mongod_replset_member_state == 8"
severity: critical
- name: MongoDB replication Status 9
description: MongoDB Replication set member is actively performing a rollback. Data is not available for reads
query: "mongodb_mongod_replset_member_state == 9"
severity: critical
- name: MongoDB replication Status 10
description: MongoDB Replication set member was once in a replica set but was subsequently removed
query: "mongodb_mongod_replset_member_state == 10"
severity: critical
- name: MongoDB number cursors open
description: Too many cursors opened by MongoDB for clients (> 10k)
query: 'mongodb_mongod_metrics_cursor_open{state="total"} > 10000'
@ -576,11 +560,11 @@ groups:
severity: warning
- name: MongoDB too many connections
description: Too many connections
query: 'mongodb_connections{state="current"} > 500'
query: 'avg by(instance) (max_over_time(mongodb_connections{state="current"}[5m])) / avg by(instance) (sum (mongodb_connections) by (instance)) * 100 > 80'
severity: warning
- name: MongoDB virtual memory usage
description: High memory usage
query: '(sum(mongodb_memory{type="virtual"}) BY (ip) / sum(mongodb_memory{type="mapped"}) BY (ip)) > 3'
query: '(sum(mongodb_memory{type="virtual"}) BY (instance) / sum(mongodb_memory{type="mapped"}) BY (instance)) > 3'
severity: warning
- name: dcu/mongodb_exporter