🤘 🎸 (#41)

🤘 🎸
This commit is contained in:
Samuel Berthe 2019-07-14 20:01:40 +02:00 committed by GitHub
commit b496d02c29
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -65,9 +65,9 @@ services:
description: Context switching is growing on node (> 1000 / s)
query: 'rate(node_context_switches_total[5m]) > 1000'
severity: warning
- name: Node has swap
description: Node has swap
query: 'node_memory_SwapTotal_bytes > 0'
- name: Swap is filling up
description: Swap is filling up (>80%)
query: '(((node_memory_SwapTotal - node_memory_SwapFree) / node_memory_SwapTotal) * 100) > 80'
severity: warning
- name: SystemD service failed
description: 'Service {{ $labels.name }} failed'
@ -83,6 +83,22 @@ services:
description: A container has disappeared
query: 'time() - container_last_seen > 60'
severity: warning
- name: Container CPU usage
description: Container CPU usage is above 80%
query: '(sum(rate(container_cpu_usage_seconds_total[3m])) BY (ip, name) * 100) > 80'
severity: warning
- name: Container Memory usage
description: Container Memory usage is above 80%
query: '(sum(container_memory_usage_bytes) BY (ip) / sum(container_spec_memory_limit_bytes) BY (ip) * 100) > 80'
severity: warning
- name: Container Volume usage
description: Container Volume usage is above 80%
query: '(sum(container_fs_inodes_total) BY (ip) / sum(container_fs_inodes_total) BY (ip) * 100) > 80'
severity: warning
- name: Container Volume IO usage
description: Container Volume IO usage is above 80%
query: '(sum(container_fs_io_current) BY (ip, name) * 100) > 80'
severity: warning
- name: Nginx
exporters:
@ -187,6 +203,10 @@ services:
description: PostgreSQL has dead-locks
query: 'rate(pg_stat_database_deadlocks{pg_stat_database_de}[1m]) > 0'
severity: warning
- name: Slow queries
description: PostgreSQL executes slow queries (> 1min)
query: 'avg(rate(pg_stat_activity_max_tx_duration{datname!~"template.*"}[1m])) BY (datname) > 60'
severity: warning
- name: Redis
exporters:
@ -225,8 +245,52 @@ services:
- name: MongoDB
exporters:
- name: dcu/mongodb_exporter
doc_url: https://github.com/dcu/mongodb_exporter
doc_url: https://github.com/percona/mongodb_exporter
rules:
- name: MongoDB replication lag
description: Mongodb replication lag is more than 10s
query: 'avg(mongodb_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_replset_member_optime_date{state="SECONDARY"}) > 10'
severity: error
- name: MongoDB replication headroom
description: MongoDB replication headroom is <= 0
query: '(avg(mongodb_replset_oplog_tail_timestamp - mongodb_replset_oplog_head_timestamp) - (avg(mongodb_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_replset_member_optime_date{state="SECONDARY"}))) <= 0'
severity: error
- name: MongoDB replication Status 3
description: MongoDB Replication set member either perform startup self-checks, or transition from completing a rollback or resync
query: 'mongodb_replset_member_state == 3'
severity: error
- name: MongoDB replication Status 6
description: MongoDB Replication set member as seen from another member of the set, is not yet known
query: 'mongodb_replset_member_state == 6'
severity: error
- name: MongoDB replication Status 8
description: MongoDB Replication set member as seen from another member of the set, is unreachable
query: 'mongodb_replset_member_state == 8'
severity: error
- name: MongoDB replication Status 9
description: MongoDB Replication set member is actively performing a rollback. Data is not available for reads
query: 'mongodb_replset_member_state == 9'
severity: error
- name: MongoDB replication Status 10
description: MongoDB Replication set member was once in a replica set but was subsequently removed
query: 'mongodb_replset_member_state == 10'
severity: error
- name: MongoDB number cursors open
description: Too many cursors opened by MongoDB for clients (> 10k)
query: 'mongodb_metrics_cursor_open{state="total_open"} > 10000'
severity: warning
- name: MongoDB cursors timeouts
description: Too many cursors are timing out
query: 'increase(mongodb_metrics_cursor_timed_out_total[10min]) > 100'
severity: warning
- name: MongoDB too many connections
description: Too many connections
query: 'mongodb_connections{state="current"} > 500'
severity: warning
- name: MongoDB virtual memory usage
description: High memory usage
query: '(sum(mongodb_memory{type="virtual"}) BY (ip) / sum(mongodb_memory{type="mapped"}) BY (ip)) > 3'
severity: warning
- name: Elasticsearch
exporters:
@ -300,6 +364,22 @@ services:
doc_url: https://github.com/bakins/php-fpm-exporter
rules:
- name: Java
exporters:
- name: java-client
doc_url: https://github.com/prometheus/client_java
rules:
- name: JVM memory filling up
description: JVM memory is filling up (> 80%)
query: 'jvm_memory_bytes_used / jvm_memory_bytes_max{area="heap"} > 0.8'
severity: warning
- name: ZFS
exporters:
- name: node-exporteer
doc_url: https://github.com/prometheus/node_exporter
rules:
- name: Kubernetes
exporters:
- name: kubelet
@ -312,6 +392,10 @@ services:
description: "{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available."
query: '100 * (kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) < 15 and predict_linear(kubelet_volume_stats_available_bytes[6h], 4 * 24 * 3600) < 0'
severity: error
- name: StatefulSet down
description: A StatefulSet went down
query: '(kube_statefulset_status_replicas_ready / kube_statefulset_status_replicas_current) != 1'
severity: error
- name: Nomad
exporters:
@ -434,6 +518,14 @@ services:
description: SSL certificate has expired already
query: 'probe_ssl_earliest_cert_expiry - time() <= 0'
severity: error
- name: Blackbox slow requests
description: Blackbox request took more than 2s
query: 'probe_http_duration_seconds > 2'
severity: warning
- name: Blackbox slow ping
description: Blackbox ping took more than 2s
query: 'probe_icmp_duration_seconds > 2'
severity: warning
- name: Windows Server
exporters: