groups: - name: PerconaMongodbExporter rules: - alert: MongodbDown expr: 'mongodb_up == 0' for: 0m labels: severity: critical annotations: summary: MongoDB Down (instance {{ $labels.instance }}) description: "MongoDB instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbReplicaMemberUnhealthy expr: 'mongodb_rs_members_health == 0' for: 0m labels: severity: critical annotations: summary: Mongodb replica member unhealthy (instance {{ $labels.instance }}) description: "MongoDB replica member is not healthy\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbReplicationLag expr: 'mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"} > 10' for: 0m labels: severity: critical annotations: summary: MongoDB replication lag (instance {{ $labels.instance }}) description: "Mongodb replication lag is more than 10s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbReplicationHeadroom expr: 'sum(avg(mongodb_oplog_stats_start - mongodb_oplog_stats_end)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (set) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0' for: 0m labels: severity: critical annotations: summary: MongoDB replication headroom (instance {{ $labels.instance }}) description: "MongoDB replication headroom is <= 0\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbNumberCursorsOpen expr: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000' for: 2m labels: severity: warning annotations: summary: MongoDB number cursors open (instance {{ $labels.instance }}) description: "Too many cursors opened by MongoDB for clients (> 10k)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbCursorsTimeouts expr: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100' for: 2m labels: severity: warning annotations: summary: MongoDB cursors timeouts (instance {{ $labels.instance }}) description: "Too many cursors are timing out\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbTooManyConnections expr: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80' for: 2m labels: severity: warning annotations: summary: MongoDB too many connections (instance {{ $labels.instance }}) description: "Too many connections (> 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: MongodbVirtualMemoryUsage expr: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3' for: 2m labels: severity: warning annotations: summary: MongoDB virtual memory usage (instance {{ $labels.instance }}) description: "High memory usage\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"