From 0bc4a1633c1781c3e69bcb266d9dff24be834a15 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Mon, 22 Oct 2018 00:53:32 +0200 Subject: [PATCH] Jekyll based doc --- CONTRIBUTING.md | 14 ++ Gemfile | 2 + Gemfile.lock | 248 ++++++++++++++++++++++++++++++++ README.md | 110 +------------- _config.yml | 5 +- _data/rules.yml | 286 +++++++++++++++++++++++++++++++++++++ _layouts/default.html | 54 +++++++ alertmanager.md | 56 ++++++++ assets/prometheus-logo.png | Bin 0 -> 17587 bytes index.md | 32 +++++ rules.md | 105 ++++++++++++++ 11 files changed, 804 insertions(+), 108 deletions(-) create mode 100644 CONTRIBUTING.md create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 _data/rules.yml create mode 100644 _layouts/default.html create mode 100644 alertmanager.md create mode 100644 assets/prometheus-logo.png create mode 100644 index.md create mode 100644 rules.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..89f4ccd --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,14 @@ + +# Contributing + +## Adding alerting rule + +Rules are here: `_data/rules.yml`. + +## Run localy + +``` +gem install bundler +bundle install +bundle exec jekyll serve +``` diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..37f5eaa --- /dev/null +++ b/Gemfile @@ -0,0 +1,2 @@ +source 'https://rubygems.org' +gem 'github-pages', group: :jekyll_plugins diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..46cf81f --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,248 @@ +GEM + remote: https://rubygems.org/ + specs: + activesupport (4.2.10) + i18n (~> 0.7) + minitest (~> 5.1) + thread_safe (~> 0.3, >= 0.3.4) + tzinfo (~> 1.1) + addressable (2.5.2) + public_suffix (>= 2.0.2, < 4.0) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.11.1) + colorator (1.1.0) + commonmarker (0.17.13) + ruby-enum (~> 0.5) + concurrent-ruby (1.0.5) + dnsruby (1.61.2) + addressable (~> 2.5) + em-websocket (0.5.1) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0.6.0) + ethon (0.11.0) + ffi (>= 1.3.0) + eventmachine (1.2.7) + execjs (2.7.0) + faraday (0.15.3) + multipart-post (>= 1.2, < 3) + ffi (1.9.25) + forwardable-extended (2.6.0) + gemoji (3.0.0) + github-pages (192) + activesupport (= 4.2.10) + github-pages-health-check (= 1.8.1) + jekyll (= 3.7.4) + jekyll-avatar (= 0.6.0) + jekyll-coffeescript (= 1.1.1) + jekyll-commonmark-ghpages (= 0.1.5) + jekyll-default-layout (= 0.1.4) + jekyll-feed (= 0.10.0) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.9.4) + jekyll-mentions (= 1.4.1) + jekyll-optional-front-matter (= 0.3.0) + jekyll-paginate (= 1.1.0) + jekyll-readme-index (= 0.2.0) + jekyll-redirect-from (= 0.14.0) + jekyll-relative-links (= 0.5.3) + jekyll-remote-theme (= 0.3.1) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.5.0) + jekyll-sitemap (= 1.2.0) + jekyll-swiss (= 0.4.0) + jekyll-theme-architect (= 0.1.1) + jekyll-theme-cayman (= 0.1.1) + jekyll-theme-dinky (= 0.1.1) + jekyll-theme-hacker (= 0.1.1) + jekyll-theme-leap-day (= 0.1.1) + jekyll-theme-merlot (= 0.1.1) + jekyll-theme-midnight (= 0.1.1) + jekyll-theme-minimal (= 0.1.1) + jekyll-theme-modernist (= 0.1.1) + jekyll-theme-primer (= 0.5.3) + jekyll-theme-slate (= 0.1.1) + jekyll-theme-tactile (= 0.1.1) + jekyll-theme-time-machine (= 0.1.1) + jekyll-titles-from-headings (= 0.5.1) + jemoji (= 0.10.1) + kramdown (= 1.17.0) + liquid (= 4.0.0) + listen (= 3.1.5) + mercenary (~> 0.3) + minima (= 2.5.0) + nokogiri (>= 1.8.2, < 2.0) + rouge (= 2.2.1) + terminal-table (~> 1.4) + github-pages-health-check (1.8.1) + addressable (~> 2.3) + dnsruby (~> 1.60) + octokit (~> 4.0) + public_suffix (~> 2.0) + typhoeus (~> 1.3) + html-pipeline (2.8.4) + activesupport (>= 2) + nokogiri (>= 1.4) + http_parser.rb (0.6.0) + i18n (0.9.5) + concurrent-ruby (~> 1.0) + jekyll (3.7.4) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 0.7) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 2.0) + kramdown (~> 1.14) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (>= 1.7, < 4) + safe_yaml (~> 1.0) + jekyll-avatar (0.6.0) + jekyll (~> 3.0) + jekyll-coffeescript (1.1.1) + coffee-script (~> 2.2) + coffee-script-source (~> 1.11.1) + jekyll-commonmark (1.2.0) + commonmarker (~> 0.14) + jekyll (>= 3.0, < 4.0) + jekyll-commonmark-ghpages (0.1.5) + commonmarker (~> 0.17.6) + jekyll-commonmark (~> 1) + rouge (~> 2) + jekyll-default-layout (0.1.4) + jekyll (~> 3.0) + jekyll-feed (0.10.0) + jekyll (~> 3.3) + jekyll-gist (1.5.0) + octokit (~> 4.2) + jekyll-github-metadata (2.9.4) + jekyll (~> 3.1) + octokit (~> 4.0, != 4.4.0) + jekyll-mentions (1.4.1) + html-pipeline (~> 2.3) + jekyll (~> 3.0) + jekyll-optional-front-matter (0.3.0) + jekyll (~> 3.0) + jekyll-paginate (1.1.0) + jekyll-readme-index (0.2.0) + jekyll (~> 3.0) + jekyll-redirect-from (0.14.0) + jekyll (~> 3.3) + jekyll-relative-links (0.5.3) + jekyll (~> 3.3) + jekyll-remote-theme (0.3.1) + jekyll (~> 3.5) + rubyzip (>= 1.2.1, < 3.0) + jekyll-sass-converter (1.5.2) + sass (~> 3.4) + jekyll-seo-tag (2.5.0) + jekyll (~> 3.3) + jekyll-sitemap (1.2.0) + jekyll (~> 3.3) + jekyll-swiss (0.4.0) + jekyll-theme-architect (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-cayman (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-dinky (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-hacker (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-leap-day (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-merlot (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-midnight (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-minimal (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-modernist (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-primer (0.5.3) + jekyll (~> 3.5) + jekyll-github-metadata (~> 2.9) + jekyll-seo-tag (~> 2.0) + jekyll-theme-slate (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-tactile (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-time-machine (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.1) + jekyll (~> 3.3) + jekyll-watch (2.1.2) + listen (~> 3.0) + jemoji (0.10.1) + gemoji (~> 3.0) + html-pipeline (~> 2.2) + jekyll (~> 3.0) + kramdown (1.17.0) + liquid (4.0.0) + listen (3.1.5) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + ruby_dep (~> 1.2) + mercenary (0.3.6) + mini_portile2 (2.3.0) + minima (2.5.0) + jekyll (~> 3.5) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.11.3) + multipart-post (2.0.0) + nokogiri (1.8.5) + mini_portile2 (~> 2.3.0) + octokit (4.13.0) + sawyer (~> 0.8.0, >= 0.5.3) + pathutil (0.16.1) + forwardable-extended (~> 2.6) + public_suffix (2.0.5) + rb-fsevent (0.10.3) + rb-inotify (0.9.10) + ffi (>= 0.5.0, < 2) + rouge (2.2.1) + ruby-enum (0.7.2) + i18n + ruby_dep (1.5.0) + rubyzip (1.2.2) + safe_yaml (1.0.4) + sass (3.6.0) + sass-listen (~> 4.0.0) + sass-listen (4.0.0) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + sawyer (0.8.1) + addressable (>= 2.3.5, < 2.6) + faraday (~> 0.8, < 1.0) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + thread_safe (0.3.6) + typhoeus (1.3.0) + ethon (>= 0.9.0) + tzinfo (1.2.5) + thread_safe (~> 0.1) + unicode-display_width (1.4.0) + +PLATFORMS + ruby + +DEPENDENCIES + github-pages + +BUNDLED WITH + 1.16.6 diff --git a/README.md b/README.md index 3a4edf8..1f42ae8 100644 --- a/README.md +++ b/README.md @@ -2,112 +2,8 @@ (WIP) +[https://awesome-prometheus-alerts.grep.to](https://awesome-prometheus-alerts.grep.to) + ## Todo -- Write full alert rules in yml files -- Make a small website with form for each rule, to build custom alerts (criticity, thresolds, instance...) - -## Queries - -### Prometheus internal - -- `up == 0` // killed exporters - -### node-exporter - -Memory: - -- `(node_memory_MemFree{} + node_memory_Cached{} + node_memory_Buffers{}) / node_memory_MemTotal{} * 100 < 5` - -Network: - -- `sum by (instance) (irate(node_network_transmit_bytes{}[2m])) / 1024 / 1024 > 100` -- `sum by (instance) (irate(node_network_receive_bytes{}[2m])) / 1024 / 1024 > 100` - -Disk: - -- `sum by (instance) (irate(node_disk_bytes_read{}[2m])) / 1024 / 1024 > 50` -- `sum by (instance) (irate(node_disk_bytes_written{}[2m])) / 1024 / 1024 > 50` -- `node_filesystem_free{mountpoint ="/rootfs"} / node_filesystem_size{mountpoint ="/rootfs"} * 100 < 10` // gb -- `node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint ="/rootfs"} * 100` // inodes -- `rate(node_disk_read_time_ms{}[1m]) / rate(node_disk_reads_completed{}[1m]) > 100` // too much latency -- `rate(node_disk_write_time_ms{}[1m]) / rate(node_disk_writes_completed{}[1m]) > 100` // too much latency - -CPU: - -- `avg by (instance) (sum by (cpu) (rate(node_cpu{mode!="idle"}[2m]))) * 100 > 75` // load -- `rate(node_context_switches{}[5m]) > 1000` // nbr context switch per second - -### cAdvisor - -- `time() - container_last_seen{} > 60` // get killed container - -### Nginx - -- `rate(nginx_http_requests_total{status=~"^4.."}[1m]) > 10` // get 4xx http requests -- `rate(nginx_http_requests_total{status=~"^5.."}[1m]) > 10` // get 5xx http requests - -### Rabbitmq (kbudde/rabbitmq-exporter) - -- `rabbitmq_up{} == 0` -- `rabbitmq_running{} >= 2` // cluster -- `rabbitmq_partitions{} > 0` // cluster got partition :-( -- `rabbitmq_node_mem_used{} / rabbitmq_node_mem_limit{} * 100 > 90` // too much ram used -- `rabbitmq_connectionsTotal{} > 1000` - -- `rabbitmq_queue_messages_unacknowledged{queue="my-queue"} > 5` -- `rabbitmq_queue_messages_ready{queue="my-queue"} > 1000` // more consumers needed -- `time() - rabbitmq_queue_head_message_timestamp{queue="my-queue"} > 60` // takes more than 1min to consume messages -- `rabbitmq_queue_consumers{} == 0` // no consumer on queue -- `rate(rabbitmq_exchange_messages_published_in_total{exchange="my-exchange"}[1m]) < 5` // no activity on exchange - -### PostgreSQL (wrouesnel/postgres_exporter) - -- `pg_up{} == 0` -- `pg_replication_lag{} > 10` // more than 10s lag between master and slave -- `time() - pg_stat_user_tables_last_autovacuum{} > 60 * 60 * 24` // did not vaccum for 1 day -- `time() - pg_stat_user_tables_last_autoanalyze{} > 60 * 60 * 24` // did not analyse for 1 day -- `sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > 100` // too many connections -- `sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) < 5` // connections number too small -- `rate(pg_stat_database_deadlocks{pg_stat_database_de}[1m]) > 0` - -### Redis (oliver006/redis_exporter) - -- `redis_up{} == 0` -- `time() - redis_rdb_last_save_timestamp_seconds{} > 60 * 60 * 24` // did not backup for 1 day -- `redis_memory_used_bytes{} / redis_total_system_memory_bytes{} * 100 > 90` -- `redis_connected_slaves{}` -- `delta(redis_connected_slaves{}[1m]) < 0` // slaved killed -- `redis_connected_clients{} > 100` // too many connections -- `redis_connected_clients{} < 5` // connections number too small -- `increase(redis_rejected_connections_total{}[1m]) > 0` // rejected connections - -### MySQL - -### Elasticsearch - -### MongoDB - -### Apache - -### HaProxy - -### Traefik - -### PHP-FPM - -### Kubernetes - -### Nomad - -### Consul - -### Etcd - -### Zookeeper - -### Linkerd - -### Istio - -### Blackbox +- In Jekyll, create an alert rule builder, to create custom alerts (criticity, thresolds, instance...) diff --git a/_config.yml b/_config.yml index c419263..fd86df3 100644 --- a/_config.yml +++ b/_config.yml @@ -1 +1,4 @@ -theme: jekyll-theme-cayman \ No newline at end of file +theme: jekyll-theme-cayman + +title: Awesome Prometheus alerts +description: Collection of alert rules diff --git a/_data/rules.yml b/_data/rules.yml new file mode 100644 index 0000000..5035cf6 --- /dev/null +++ b/_data/rules.yml @@ -0,0 +1,286 @@ + +services: + - name: Prometheus + exporters: + - rules: + - name: Exporter down + description: Prometheus exporter down + query: 'up{} == 0' + severity: warning + + - name: Host + exporters: + - name: node-exporter + rules: + - name: Out of memory + description: Node memory is filling up (< 10% left) + query: '(node_memory_MemFree{} + node_memory_Cached{} + node_memory_Buffers{}) / node_memory_MemTotal{} * 100 < 10' + severity: warning + - name: Unusual network throughput in + description: Host network interfaces are probably receiving too much data (> 100 MB/s) + query: 'sum by (instance) (irate(node_network_receive_bytes{}[2m])) / 1024 / 1024 > 100' + severity: warning + - name: Unusual network throughput out + description: Host network interfaces are probably sending too much data (> 100 MB/s) + query: 'sum by (instance) (irate(node_network_transmit_bytes{}[2m])) / 1024 / 1024 > 100' + severity: warning + - name: Unusual disk read rate + description: Disk is probably reading too much data (> 50 MB/s) + query: 'sum by (instance) (irate(node_disk_bytes_read{}[2m])) / 1024 / 1024 > 50' + severity: warning + - name: Unusual disk write rate + description: Disk is probably writing too much data (> 50 MB/s) + query: 'sum by (instance) (irate(node_disk_bytes_written{}[2m])) / 1024 / 1024 > 50' + severity: warning + - name: Out of disk space + description: Disk is almost full (< 10% left) + query: 'node_filesystem_free{mountpoint ="/rootfs"} / node_filesystem_size{mountpoint ="/rootfs"} * 100 < 10' + severity: warning + - name: Out of inodes + description: Disk is almost running out of available inodes (< 10% left) + query: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint ="/rootfs"} * 100 < 10' + severity: warning + - name: Unusual disk read latency + description: Disk latency is growing (read operations > 100ms) + query: 'rate(node_disk_read_time_ms{}[1m]) / rate(node_disk_reads_completed{}[1m]) > 100' + severity: warning + - name: Unusual disk write latency + description: Disk latency is growing (write operations > 100ms) + query: 'rate(node_disk_write_time_ms{}[1m]) / rate(node_disk_writes_completed{}[1m]) > 100' + severity: warning + - name: CPU load + description: CPU load (15m) is high (> 75%) + query: 'avg by (instance) (sum by (cpu) (rate(node_cpu{mode!="idle"}[5m]))) * 100 > 75' + severity: warning + - name: Context switching + description: Context switching is growing on node (> 1000 / s) + query: 'rate(node_context_switches{}[5m]) > 1000' + severity: warning + + - name: Docker containers + exporters: + - name: cAdvisor + doc_url: https://github.com/google/cadvisor + rules: + - name: Container killed + description: A container has disappeared + query: 'time() - container_last_seen{} > 60' + severity: warning + + - name: Nginx + exporters: + - name: nginx-lua-prometheus + doc_url: https://github.com/knyar/nginx-lua-prometheus + rules: + - name: HTTP errors 4xx + description: Too many HTTP requests with status 4xx (> 5%) + query: 'sum(rate(nginx_http_requests_total{status=~"^4.."}[1m])) / sum(rate(nginx_http_requests_total{}[1m])) * 100 > 5' + severity: error + - name: HTTP errors 5xx + description: Too many HTTP requests with status 5xx (> 5%) + query: 'sum(rate(nginx_http_requests_total{status=~"^5.."}[1m])) / sum(rate(nginx_http_requests_total{}[1m])) * 100 > 5' + severity: error + + - name: RabbitMQ + exporters: + - name: kbudde/rabbitmq-exporter + doc_url: https://github.com/kbudde/rabbitmq_exporter + rules: + - name: Rabbitmq down + description: RabbitMQ node down + query: 'rabbitmq_up{} == 0' + severity: error + - name: Cluster down + description: Less than 3 nodes running in RabbitMQ cluster + query: 'rabbitmq_running{} < 3' + severity: error + - name: Cluster partition + description: Cluster partition + query: 'rabbitmq_partitions{} > 0' + severity: error + - name: Out of memory + description: Memory available for RabbmitMQ is low (< 10%) + query: 'rabbitmq_node_mem_used{} / rabbitmq_node_mem_limit{} * 100 > 90' + severity: warning + - name: Too many connections + description: RabbitMQ instance has too many connections (> 1000) + query: 'rabbitmq_connectionsTotal{} > 1000' + severity: warning + - name: Dead letter queue filling up + description: Dead letter queue is filling up (> 10 msgs) + query: 'rabbitmq_queue_messages{queue="my-dead-letter-queue"} > 10' + severity: error + - name: Too many messages in queue + description: Queue is filling up (> 1000 msgs) + query: 'rabbitmq_queue_messages_ready{queue="my-queue"} > 1000' + severity: warning + - name: Slow queue consuming + description: Queue messages are consumed slowly (> 60s) + query: 'time() - rabbitmq_queue_head_message_timestamp{queue="my-queue"} > 60' + severity: warning + - name: No consumer + description: Queue has no consumer + query: 'rabbitmq_queue_consumers{} == 0' + severity: error + - name: Too many consumers + description: Queue should have only 1 consumer + query: 'rabbitmq_queue_consumers{} > 1' + severity: error + - name: Unactive exchange + description: Exchange receive less than 5 msgs per second + query: 'rate(rabbitmq_exchange_messages_published_in_total{exchange="my-exchange"}[1m]) < 5' + severity: warning + + - name: MySQL + exporters: + - name: prometheus/mysqld_exporter + doc_url: https://github.com/prometheus/mysqld_exporter + rules: + + - name: PostgreSQL + exporters: + - name: wrouesnel/postgres_exporter + doc_url: https://github.com/wrouesnel/postgres_exporter/ + rules: + - name: PostgreSQL down + description: PostgreSQL instance is down + query: 'pg_up{} == 0' + severity: error + - name: Replication lag + description: PostgreSQL replication lag is going up (> 10s) + query: 'pg_replication_lag{} > 10' + severity: warning + - name: Table not vaccumed + description: Table has not been vaccum for 24 hours + query: 'time() - pg_stat_user_tables_last_autovacuum{} > 60 * 60 * 24' + severity: warning + - name: Table not analyzed + description: Table has not been analyzed for 24 hours + query: 'time() - pg_stat_user_tables_last_autoanalyze{} > 60 * 60 * 24' + severity: warning + - name: Too many connections + description: PostgreSQL instance has too many connections + query: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > 100' + severity: warning + - name: Not enough connections + description: PostgreSQL instance should have more connections (> 5) + query: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) < 5' + severity: warning + - name: Dead locks + description: PostgreSQL has dead-locks + query: 'rate(pg_stat_database_deadlocks{pg_stat_database_de}[1m]) > 0' + severity: warning + + - name: Redis + exporters: + - name: oliver006/redis_exporter + doc_url: https://github.com/oliver006/redis_exporter + rules: + - name: Redis down + description: Redis instance is down + query: 'redis_up{} == 0' + severity: error + - name: Missing backup + description: Redis has not been backuped for 24 hours + query: 'time() - redis_rdb_last_save_timestamp_seconds{} > 60 * 60 * 24' + severity: error + - name: Out of memory + description: Redis is running out of memory (> 90%) + query: 'redis_memory_used_bytes{} / redis_total_system_memory_bytes{} * 100 > 90' + severity: warning + - name: Replication broken + description: Redis instance lost a slave + query: 'delta(redis_connected_slaves{}[1m]) < 0' + severity: error + - name: Too many connections + description: Redis instance has too many connections + query: 'redis_connected_clients{} > 100' + severity: warning + - name: Not enough connections + description: Redis instance should have more connections (> 5) + query: 'redis_connected_clients{} < 5' + severity: warning + - name: Rejected connections + description: Some connections to Redis has been rejected + query: 'increase(redis_rejected_connections_total{}[1m]) > 0' + severity: error + + - name: MongoDB + exporters: + - name: dcu/mongodb_exporter + doc_url: https://github.com/dcu/mongodb_exporter + rules: + + - name: Elasticsearch + exporters: + - name: justwatchcom/elasticsearch_exporter + doc_url: https://github.com/justwatchcom/elasticsearch_exporter + rules: + + - name: Apache + exporters: + - name: Lusitaniae/apache_exporter + doc_url: https://github.com/Lusitaniae/apache_exporter + rules: + + - name: HaProxy + exporters: + - name: prometheus/haproxy_exporter + doc_url: https://github.com/prometheus/haproxy_exporter + rules: + + - name: Traefik + exporters: + - rules: + + - name: PHP-FPM + exporters: + - name: bakins/php-fpm-exporter + doc_url: https://github.com/bakins/php-fpm-exporter + rules: + + - name: Kubernetes + exporters: + - rules: + + - name: Nomad + exporters: + - name: samber/prometheus-nomad-exporter + doc_url: https://github.com/samber/prometheus-nomad-exporter + rules: + + - name: Consul + exporters: + - name: prometheus/consul_exporter + doc_url: https://github.com/prometheus/consul_exporter + rules: + + - name: Etcd + exporters: + - rules: + + - name: Zookeeper + exporters: + - name: cloudflare/kafka_zookeeper_exporter + doc_url: https://github.com/cloudflare/kafka_zookeeper_exporter + rules: + + - name: Kafka + exporters: + - name: danielqsj/kafka_exporter + doc_url: https://github.com/danielqsj/kafka_exporter + rules: + + - name: Linkerd + exporters: + - rules: + + - name: Istio + exporters: + - rules: + + - name: Blackbox + exporters: + - name: prometheus/blackbox_exporter + doc_url: https://github.com/prometheus/blackbox_exporter + rules: diff --git a/_layouts/default.html b/_layouts/default.html new file mode 100644 index 0000000..f5a0176 --- /dev/null +++ b/_layouts/default.html @@ -0,0 +1,54 @@ + + + + + {% if site.google_analytics %} + + + {% endif %} + + +{% seo %} + + + + + + + Skip to the content. + + + +
+ {{ content }} + + +
+ + diff --git a/alertmanager.md b/alertmanager.md new file mode 100644 index 0000000..d049bb4 --- /dev/null +++ b/alertmanager.md @@ -0,0 +1,56 @@ + +

+ AlertManager configuration +

+ +{% highlight yaml %} +# alertmanager.yml + +route: + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 10s + + # When the first notification was sent, wait 'group_interval' to send a betch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 30m + + # A default receiver + receiver: "slack" + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + routes: + - receiver: "slack" + group_wait: 10s + match_re: + severity: error|warning + continue: true + + - receiver: "sms" + group_wait: 10s + match_re: + severity: error + continue: true + +receivers: + - name: "slack" + slack_configs: + - api_url: 'https://hooks.slack.com/services/XXXXXXXXX/XXXXXXXXX/xxxxxxxxxxxxxxxxxxxxxxxxxxx' + send_resolved: true + channel: 'monitoring' + text: "{{ range .Alerts }} {{ .Annotations.summary }}\n{{ .Annotations.description }}\n{{ end }}" + + - name: "sms" + webhook_config: + - url: http://a.b.c:8080/send/sms + send_resolved: true + +{% endhighlight %} diff --git a/assets/prometheus-logo.png b/assets/prometheus-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..77721090ce7f8d9a7a3be566aac24e87295281fb GIT binary patch literal 17587 zcmY)V1CS<7&^8K>?b)%ddu-dEI?_$?<64}%Q@1Ox;xDIuZ=1O!a@PnLj!`1e#*q6Gc-0(Mdq7XqrD#y$JjfVP*= zZ~_8?Mg8vv2FlFF{0GpmP*!(VmzCi%va_KxFt#%^p>wyf|Az(w;&JEtC)$`e8xXqN zSlc>rx$_eL4}$BT{GXbhnDBof&Q`p{>axEHh3ymZdU`iEH##?FIy*-*dPYu8PI?9=dL|~?e+XJ94_jvgcUoH~lK(aG z|Fa`v;$-A#Vef2VXG{2>T?0cq7iV5#;{S;L@AbcaI$N0j|Cwx^{!du{0;K=1hn|s+ zf&Ty6{x_88KP{KMqlL*o=Kt*TG4lKm^8cgzKRi72|B?T{ocZ5K|3~{TR6ZCU`v03Y zJ{XyP3JoA20U${cL1lN~D;-FE?V-7Cwd%8LW!jf*peR!rME#6y*(Axb0&HuzzT{b- zk}!liXb2F}Lr6npi85#~Kszw^Dg>5&0zgZ(W<~F(XRU)Gr#J}^APcp*Uiz}#P214W z(ZJl$@PN57-A7H8P$qFf;P<#P!+G{MH42enDi0Lcfmk(b^` zro@Wem?8c@P&wwRt)YU31ET|%>erML;%rv4Hi`;7ISdE|PfWOc6^m2ee=saKB#34l zU}`!V=5hh6i*{IV)5-FDbLRPG*r6%INq@@WHWJTke5QR3w`k*BX3SR)ef_H^C$mvT;v%3 z%A)Nyt!#7E;?xIG-Wgv^%Y%Av)szTSq-79kbGGklZG-_P5a<)UmNSfy=swFoz7J>lf z2Zei#0@47sHVz_pV29p)?TI0Pq8kqd)dz_G7{v)!#J>4DdxJ90tL~JAPXU)d2WfeT9hg z^^%06fvizAx1qD1A2IIwag2ab|G~me=CPsBn$Vp->M)Qd@=RgM`e+-q88jd}Je=vA zoXq=gLcOl(=lg+CA`uRYVfyx8Qo3i2cdAdI$+GLXQ#yzOdawLm=~7KHQAMI`LxQ~_ zv1FqLHvmq1Z?c~@45_7fzQf)5ZYt z9!0~0S@r);q9K7FsNeJ944yNc1u{``vHRZTBZ~EvC7}a0i;XHQ(I%mMp{T!zmipIG zkj>g;Tw0ej^<>W4`U*=f5gyD-0>9Po>=icZ{cZ*(sA(<;WOy8o<^L<9PkN(#y`-*6 zMgtK{JBuCJ*(U=A78X$@GgCl)&z5^vv*!*V6px-9n~hPn?=s zoIyLA_DLq0!UMC)?-twRXQAI<3BQ-egx1iE|JUuL0RifQ(<%oc2SxTGaiOqxFv^&W zG908l88W^JP-yU9MJ?=du2A_(fl%1S#%o^|lI_*lsRHeP@+m%ZSc>13&jU1#e4-geMyd9nyzIWznJ z2GS>x2NvEu7=Z&#*KhG;KT#KHfHKklU@t`?7254PRb^Uf^TWVF5|4T9+r(|lRR#qW za}+PGp2@ac8xDHz9;77=XXLd5)s)Jz#b>A2S(hj`-HwswG5tTK1O?Hm;#gxt1&dV2 zc%Y((M~zbl?SRrFK#4@*A5^C=a2l`Ka%n8kxj&;rDP>k(Y!&(qNh<25egz+7kbMe* z5rQe#hypE`ErUsW+?%1{{xJN2{FX5Vt}s14&*X*$cZmr%n<3tD7}5$2#uQq|#^<`& zqy{p`UKV_qQKfHRvNE4R2i53gdjhQEKf@?U+#G)W=ENm%298X4nK)+3(#;%OKThsC zClR+5(g;b9WV(g}S;1jwzNDxQ>!+!lHVF zKyqTHfgY7=FnnTsN|)s5j1ITs&VglcQEa6D>nu$lT^v5so*yJAurm!;mtHo&jTLm- zOlgcy*ztVo8a`&2mId{!mYWsvE%aF&=sY?VqRl^MmJhP0QCeXmIHLVrX22 zFOOfqu+PG(wQGGTIS67TWDJE9FyZM&u5Y6Ey$LZ6418iFl7?P?lw{JjE^VpRL8Pu$zvO__Qv2gBvVGYP~VldkXW; zrSQV~*6K_~2EkVi2U#NvKWb;W%>Hzgv4oBD7uRKFZB&gX3yq`f#IVzg>ch5#ivq>- zvCZ#S27!an{6f)+^LxEq9gn%mw2EquhuloE06^r;)dUThsHnF#j$9cTmX+NK+;X+U z7>&>y((A{d&GF6iuz)5=@b-Sr+Uv;9o2W2>eAvz${1-9E21G5z$^vWL5bIWIHDWBw z5<-ESNhN?cTZh?|$DuAN-QHfqxGATzKLwD@Qt~;9f&)DXC>6ftz?DHWVYW`s*EWc2 zsosx_Oq1FHLI|B1M=C(r{8qPk#HapEdQ5$90294KnN&)IXxiGN zmmNk;0ZF6fuH_hS6fmr4_FA8OcpeeRBTo6PkZW^hNu-C^exq*Vr;b8acvBZSS5G3D z*|$f7udjo`cEiym!YHQ@|7+_VTl?)Pkche@m`OGO2hf**Yz-p=EiU02#%0O%mlNbH z=P#j^jXJnf&=X`xs$V_W?d0I@;SSqH8YPE-JM&l%KSW|y#w1le$Pm+(tOMqhsYaEq zGegQWXO4|cl;NyPEjJ$f4neaFbe9n!EpWXWc;5lZGw{N%u0Av88*rN)_D-fs!HxaY z_v(~kRL-#$unLoVnu_l^@pHAi>)59eSzct2HR2Z1=*T{%4%ro1dFiP;t!l0l+)h7; zGD10P0dioU#SD)oP=4VT%p3k-Q{Dod8NK1Wb0EuNFv0`Yq6ozxi*;I^K zAS^Ks*fcnNEm<+oK~KP8QKKv02iNT9m#kfy{EF}KnfU_Hy>)|mJue9!!p{={x~{E- zGRaASQ<*GWbd!o_<@WB~@FWgPy~3%Z{f&)JnkIC%4kIbS(|I1q(QUQZel7 z%J|3)dDz($j++zCr-0ao_ufGdlDxkP9~m_S-Mj5acqvL)b3pY+(~0@BNX|E<46wmI z^;-vz|K)foquAU#hBaCRH}ME!hs*Sx_4S5(3KZ|i(UL;IL{?im;!tKmIj*y>O4H)n zdoyW(3Uek;7#6jgDqv}JSjc0PG9lgA2A4lI8;v$G7h-gA7tcdzyxi8m#FsxRdCvLj zUU?Pqa|z3MVLy_^D{8b)?wygd%+~RJvC<3-jAr3D+t4%J5*4pONm<5HS8PAoQkY2v zol?ZRwzJs1l|s=w!W8hq`(pR!Iep(}N@~ywG-9nJ?FpqbPfH(jhFn%qS{YU}Xm%UD zPb4mte)*jLT}VRuvm5V}rlW_Ar>+dTh+J4XrKVR~F}H3XiENSAOtg5una)f}{qc3v zv9*m69F|uje`Vf20z$VQY_ss`RHAt|N_1P)wGyiIjMsVny5-V$mU<*8%x)yAH&p+S z8q;xcXZL!I%az6FCQ|z+6euMOTH`H?`FWALn^UM3Zj95gAJPcl>x=}6kngPGB&RbB zUBwK0?pB#(b)S#Bg#lpVk)}ZclRIE+Nn$vKew#6|vFQ9kMk17w70d2o^#flO;~nN` zB+RfJxk&GAl z#=pD;XBR0)7eehMRh{>zl9&W#TYjsZF|F+@(CPIwMtAHk;&xWgQqUiemHwoPK2+M= z^_^=)KFDUrHy+-6qvWt`FyO(r=UTA8BF-+6TPgpCXyBU+485kPegR1z1YXv-|2^r2 zQ)6~X5*?e))C962>e<`Yes|8^?n^(eOSXr{_%Avv-k7s+-x*koU4OA3G-Bs%$XD(_ z5Wh0}FzATG(i8@oM$DA>K-|YR&I}h1Vt6Q|vT$YVZYvBG#Y<9W4*3_emuo)s9S}F+ z{z_f%>d_xvgJfYzZne5YX)EAqMR0RfIPA9@39o^aTWWe{^N_pT7fDN^jh9@pn!)1B z{+F^%69!+A5DPVez?}JA#J;92p>8BqP4!eUCSVZ8`%{u7QN-_A7M?2OkjmLL+YsC7 z7k6@O)99h|zYVwV`1|T?damm{E(`3iI$}I(+oua5LrN?HI3;KZCu) z+Ivr!tO0FaL+ol(D{M7+XDlq2K@tcNWhpo6TyP!4zOz0FXyG}+~j%W2h`+-Jwkt^0YH%-wz zf4A%lV$E@InVa&^VZ){?U?%z0pa2?H;3&N+x}N7lS61Eaa?6GXn`L6X)P3+`1L`DT zca!cM-do4ga+(b9bv^t+xZZ}O20~{!eaH8JicJldw|qdG03^&J8nmTVUN55%O^jwZ=A|Kl`&l35~D(AM_qp&Q0 zL~(_4aoWX#2H3u@~yd_=E={vmemo-t**^MVw+zTwmVSbKy=J!)E$Qok4wvYF@0c!ZR*%B z9|_{PDHHxF@+FahExmNo$lc%+4yw{pN6P%Q_QbUvtZ~nQMs|S$GKwuACz+dj;<$}h zzYiF>G8RnwysD+gJmy<7RmAV$t&-3_5ixDFtkdP49?qGHiF!G%)vH zM*<38@lY59jHT0r6;&m0xzFJL7PS@!4+y>lTM31W)`FNu770qMT%ok804nw2S8^%s zq~NZ_87PhGgHUobvOMq>P*|7RbHL;{;P4V)jKrAnCra5>(y|}9%M0EOsJ3$}_Oi?% z(pAxD>I*O6uEmHGXFh2E(9i*7WabAyoB#Sc4X-6MRqVL6DCa?6kBEDeyRa~iYT&RY zQWk_d-ijuAyIr+Qr_AuZxi6@k;DX{lUW;!8s-WJ7%|b~>AXuqC+~lz5IvwtJ>L1wn zD$N9KC1EX2a>#0;{voW&#k1Pds@Z(BJ_!4{MfI&X&UG!_tTl;HgN_X{L4(bZfwc@R zEVOylAPaBJ6po8S&$P_j=5brr_8&)IKM2Q945%qi_jm5Dz5lGxb68g1rU=y}$EN0! zi$FgwjD|j(CE?h$HXN8Mj?)|#*2pcHIuYUX`T08M_HzQF;KKwR?H!@(u5;8bz|za! zQuLC|#pl&s-1j(T?WctsSJvw$#bsYT;4-P*U~HH@;_#rX25-eKWK8bOWb@|9TxGqT zi7LuyO!sf)v2AMldpZU>n+EFn=$;J+= zkuDhQLcVNr(`k|)@Ty5`yP&B-@3#@Q*9N%k40?z>hT~#Xjoau7}(EzJ8mWc<{qTYUyGogJ#&POx6gnN5vdcIyFPLB=6Rn;)P{+qgGe#g-dlc-B#RWQWz-k5J6sz2=?EtS-B;)4E5OzrG z$%A4u$~PA;%ppHN`2DQC`fxCh}b=DmvVbUIz#gy`vmX+ z6t}SNhqVTL+IFoZGO?lTz|y__54@s3C0mS+_ms4_JZcEd)8htDMTXQj;bu%bPk*7e^Ce zJNKjGbTqDpWGw67H*=jOEqY%SJyour9R}s~l$^4Xv1M{ESxEH!i9Q-=skWSp684BC z79*T2CN=REW!w5&qpvHMKb8b=xYXKMt0Ke<(+Ze@(z-e|nil{v(_U*4ecKWD(1uWB zZ_CKUjC0)d+C`=xB`!j!62Z#Di`h{R9E<92d0D;*ja&u<(H}`_(V) zyD{n9-v^_vO3K=ahx24+LFdlhMD5ynl5#WX?yKW-`*S|Bq8U^;>*@I=6`VFMkWGG{ z-%86P(q=G_(~)=3@O6`+xZbF8JdO|n7|ONIC{mcum`FSUJ7vk3y3WLN<)DgxLU7O6SHh5%TvZOe^*#$sh>$ z!b@vlU*L90)Wh`$Qy%KKdp=MzJzWOZm#5RsW-QWsuyL-W8}5g@RW14KG$XdFW!rH= zRc-;S7&)gtz2Cnd%jZuc5EYLa4U78)X%r9o+k}hl4>(A0x4=C!zOo0%e>i`dMG27i zQ6U+*`sBKB%1OCQfLpV{E0xvLg+xk|lc5f1s6`a@?RX5<&2<=#9VZ9yB5MZm;z>&(ZiV3b$urM&KCV zs^%5ztsMhY6~ewwBoX@gPbS3crFLB|-;3sNk)jAlv?`8es|^lftY5MK#ua&AnVl+CK8 zoQ7py-~G3uYtQ$EX=iWpZ|K4`)a&DSSdu$TW~+*mi^R%%wxc+AcDsGL6S(%XzN#D# z)vm2iiCuXc8a`?l$gJ4tti=5l;%;@9B@c-PlrT@u>f5kiji235MQ%6ukT@kR0s3u> zz$#;H+nmm;ZAKV9pOR*6pVxNFIB}Z_cWG%iZbN}i16)c@LNN!0dNu86b~9Aev@4WL zryqG=E5lLJ&sBb%$~li6G-8lF8b7H>J4~70u9=AqOT5)(#sfa`)miF}0;);q%uWUb z#+tSlS|csU<8i8P8ngb?CF+HAL;aA)e~Fd>W~^i2Z!I4q2tEd*G4l7_Mc$ZE`z6wH zTr1|4T3|{sm26uqn|rCKEp+MPdTvyrQ5JGIl3nozDs)b2+nzTlc4ZdEvx$a$8jpiH z8Uzu-Bian#dp-8m>Ep7YKUST4`yD|z6BunvuT)o=C8lB6ajTt=1c%IDx!?gnzyj(` z3lB8BE-`2K?JJOnlGa(IO7UH$l8W9H9Z!*8{d!eAMQEWp9BOAwKHxZCN-x|dqpWV# zXR5moePjWGvU=W}9Xe42XIRX~mIlY*L~`Cq??tv}Z$;5Fm)Vz9dVUt%!#*~`Ps{BM zi0=?IV}8Ne{WG|4;8sR&ZScY~oD}oF(%lR`*5$X6r^*nM6TO}6_cN+zZ;-+kN=wWo z5Xj;19hTy?e0wsvTF(k-!qi!1QLD_|)n1`8Ro#ZalLB5DPcQm7Wn3VSF2gz8N4Glp z&Xck0DP%CfXi6a71RBt~B!(!! zsmp9V+5i>C&;z*kbtUbCQgpX3ZMmL$=Pm!rOMD4I_3@o8u}-WIFuRoWPrmThw>m94 z5Bd}gE5J|a{z=+cN$!n-O4;De=L>I=g_O$Wj`Igs#Za7K7f!SoT?K^x#B4n z%WbnbFjE5!)Ups2B}$z(u)RtNHKHp=+-Gv(=OK|B7BU0%7aMgWQ*O)q0@bd1Vr!sOUUwS(-|o=OR79b?9RgTDv~x9T+k`AZk4^@1wW?X=Ha!p zQMvC;%!@3%P!S_!O!6|WpA=qcTXI45t+5&ux*MOPoe!EjdW;DD z-;0#!U=`)ppR-DCe6-nq!bPlB=n;lG;S@^^IzrIs61{>ZL)0r*N3&>dxCi2n9HJw& z$Zn{28hz;`zbKm{Ctzu%Hq=#39gdjcJQ=%hi&sZ=boF%m^OEi@F`@%k@`B=nS7)s}@}?sOiC?Le5ZR`fkxH zleW6uzdXWrHtfF(?9B80p34<9zH>V)7K*;j7r=FS9cz`MqD?g?L$(meU!C&CLkyV& zq9y3pUHgfBO&farQDv(6VHg~!*7SCzdf=Je@S665%O3!eo)psPFM(Hxb+e&*gNeD! zr%`pKKlgFCzmeKP&eA1o(OLA1G}@ZLKQ6h`!VzVat84aM1X>{0a&kR5CAA8xT6CXb zc~?EHF<}ht6$$yoFd;2YQ1AALA^Ktw8vR7#DShX&^jHcN4Vo zVqR_GYAD{qX3Q@m$C;>J9&!oygHm>&<7L-%kAvX{ApBC{h(}-E14TjrL2vw?&DB9xHv}45QVBr zj&bt>rr;D5iF_H>^4$25^PWazoRKj!J1Zykr3D4f!am0JFgnViWBd1Br~=`%o--+jLa@wb_(dR|FOeUO9ZFkt) zKIHjR!e^Kwg?5nfJ@oSbqG9MwUOb}9dg!`%RT%6ZE#7WhZ??N_=&T&=rKZpVjyqga zoB9h_*|Ti~-9TOm=Y=WhD76I!!`9V7u4iqzd(a^#$M+50`i^=KVRV1KNGSuVwS)lm zN+0YOrAli6Ny67JgjXYaFiRu&OtJRgCx~kmv|s*877C+7A5U!UnM2{?FVqr?M$LQd z&iha@XlF6cFBaG$3Dz2ps!OHlELIA@l^fSmbQbHwHDK(+n*|Dia=wm@KaG_DAIK}b zH$MVXORT@AIOZgaq4-pnzk?mNx{yn^tVUfIaX3=8LVP@FPbq^MD%_0NB^R4>Lvc=e~KC(!?Vq)^{|_d`Mx7 zPsDDKoD-?4;Rj48ABIy>&2qN+@3fG2J%eM2mIw?ASXoTEfq6DZj=NAZPyFFSRmC+6 z(%L|+c4TeA#wk#<&J7rxNlt|f?vK^ZVfV%q-SEgbA|n$^n0r$krLFJ^2&ScIt$ zr^dR?oiI*t0y->(52^qqZ>K066g5#;CnSk0FDo&|;-ieCC@ia4No9BJ-)TLJHRyZU zp^Dv$&H-JGWU5;~n(@j#&w4zDn4E+kTc;XEFv+&Ua=N9mqSpQVattfLvow&wOhDR! z0jh0ug;_w)^{>{7P`Hr70`27RPQ++{ENFD;_I)ON6%k>#FBP=61`*2-_nXyp&MJ!MiGy&_eae?vefZh7a zYZM=}^}CR={@@^@bmoV`>r#d78q3m1gPly$-+prA`nDsDO0WPTvSN>pfk#Su;M@I$ z*zs~!-$@{Gg(m!W!CapW1qV-2)sedL=U%H>ZUiKQa;+A^L^z7rxTRMC5uS=2oS<9& zP(Q4%7ReE!4%iSmyxnh)sOIB>WprBE_~>w|)2qMbVXW(7jOL~fg6m!RSGUoBT{QZD zY=nJ?KogsGd&yB2wJ{HY5bP7K#|(WT!&7Q@>4R>(4biqEL;^aN^A2SgIo{j?!H8B! zCYU!7&KTO;apw#0!*~XS)g&_mbm9~g1{Rfd*f&d#@5d8yr*OUrcJM!@a6Ps_D=d~8 z6E>RgiI?)(^!=4fOZ(fT=k(6L8WR)gP>EO(cmI%O_OK81eGwfOfd3M0oZYts5Igb^ zvljUf2+$WqEBrY!CVA4`KWsK1Uma%aFYuw#kfdUc(qP(#c?jj!tMrQXp9>X{$j}m6kVs1WpfkYx-w7#dBsYZ!z3=^I;nJ ztgx6_fL*iW0)#pOd@Nyt^_6Bh~a~d8L>gYjwpM z7RFwkMMS#HUx#;xMkSCk5xB4hj1#?%D_?KAcCSxrVu^)7lj)%Q81C|W%(*a;_qrwA zO~AzH>B&Gjhb?TkcsR~{vM40;Y?=2;nP>$_biBfOBqZ=51ws;kg!)QYBT5f<7J1BJ z=8ZP>hX}jz`mj_va8}T={qg*(SN5 z5XP2t>Ll`~%*zXomV7&?aV*xk7HoPt!1lQ~yKGVbn2ih+nGxF#>Ve~;BkC5M@QmT1 z=W@o4zR`MI|*C8 z3=YdjaS}f-dDb+c@o|*lR6?n*0QhH?p?U^evCgOAwTFQVlAB1)#Q?luXQ^Y6p2yO~ zo7|EbE{5VdqHAYHr`vR`d%BLI$YBq=Y-mVeVi1X-@T-Ng3|?diIZ?&9ktSqa1Ht9Iwk8P$5!?+)eeZbg8#_!@g%}qh>LfT4K|K z4ND+coVVJQzV+3*i9|LhruxcC%!gVyh_PkOq)OldNa72<--7gKIA8{LF$wXBrdmi? zkofP6$pXQB{VwP>y+Q%G%6U=pLO>c&QI55!>GA%HW|l=?p^TY>LYYBh&@vi{M63uE z^o9qs1|9_@=4hm)14m1j$vh ziOX!*=mKfGwWnmNMpMU;u(5s<30Xh$qXWKgbXe^WwYIbZ9|1{xT9mLzdhtTPuGQ~- z4@^n{la}g!Rvn=lz;hG{J#K@a`gSP16(?LGW*sfH&Z`5`dAlwP)LE&#a+8=cmJ6V{ zYAzn#xPH$X%xmd)jH)W2sUsFl-gA8FsA!WlE14b)yr)_- zmHpY(T8~=`vkgS7Ln&kpi)WQJqqbEi_%>X68PGY`Df5?y0M<06Z?su(59pjmOcoIx|CPS`s#s5 z9>trVDnY(Z;&>hRa;I`2=)OM7TQtA9Uqv12by5?GC7gN2_mGIcJJp2RztLLL0RZid z>qO%X9FfAd%uzhg;NPQmg zq~;n*0@3{#!&=yMmDZl8LF-xcT^_48e(rXHGb(=|8@0iNAr-~z`$g%uRNeX#J`O`H z)6v%y@M;$dz6wM(2Cm3OFZ^B;Jl+W>5t+VVbFHfq*-so>NX~q0zdovD{otE zXt-HNPJaU$_8i;$7j0pRDU!nAeViX}a=+AwkGX%EqCTg=A1R8E^s`a19g$n~I_mdw zt?*;4x4kRTEV(tq#hy!zuwGw5gWRLhej|6^e?GeDHmn=9t<8-wj9+*r8QZoPtD2fGMVMb~+}bJaz5p+YHFqZa z3Ok;%2jA+(9vR%49?xJUC_NxzgOG~I+A-bj3fAbJSW@d)^?5m)00C}m^-`*hEF@04 zu<@MRvTi8!C9l1bb6#4Ab_FMc8wKluAieln;ASDWF{#ZxPP~VFSSKZ;gn>f+m=KgC z@qI!ayh)CZ(zkH5t#uK|oEOr1jNlWkDI1qd#&u&MMVhie6@8+){XSP4|Isb%6E)Z3 zG|j(Ujpp0$6U4P6Wy3=J>8GK9a_$*sG#TC}O9mrc_NTn<_k26JBKz(TL!}VeU)8+a zM(BRtexf`vsPu|k+;Y52c9-5DQsr>3|7FP==xr8~f$Jo$$vZc7K@gg7Zq}G2;ZYv# z{Nw~8LLdGb@2J$$sviIof(WXR6t(dgj z?<4ou+5QXj;POtamQ(y7U6(U@UDSbuA0~)HeSViGua2S%Qg9B#A3xz$ZsrRnz^1sc ziX^N^@2Fs1+5Q8N4h02+re8zWx0*ugcTPY1`};%qXz3)k)NcDV{}nt;RW_fHJp&Dj()CnC6{C4}l60h(sEDet_l6;Ha@Qy8iP*&yQ+;U+8)t z1C;5}WfDKC@DwxH)}nzpaYd*H>8__EdNW&j!<;J4CxShwI_WL*L{yZwx(s5`7DySg zh;Z;wGyRE^6jZYsz{mv8L(vW*xYKy}`|vCVG&Aw!*yc87=Sa&%=?KD%r;~>N5C>j< zN6(Hpk4Ak6QdT=`JWzK!wu^)ltG~Vr97O#k- zbF&0M!{N;7=vaiI=H1$JCf1!>&F*; zw6&2;2&yg*fDn@ONRV-m>v?cKReX;6ceoDVrn0uRkID23n!EkI?QD5`ohPPLz3YEVZ~lP3H?A>K5;8Cn8Q zkGDv?FIxl_dV+yTHlhkZSAYK_VsoOk-PAehYVwN$UD&SkhK6yXWfR^3r__*DoTpRi z_90HLK*Xv5C7-PXG!RODzByE@67}v3fg7C(HuYS=(WGC+x+I~d<)9HLq;Sw7aV30mVLC5_b%P8d0M?1fG4}?u)d1!iGn4Q=Zr6whrBJ)e z*Pcdp_DOUVG8&K|r2>bG?DO7Dzc@I6Zwxp{G=L`9Nj`%^RU0a{Fm&o2Ub_Ru+d;!sc4z2-m#zivQg*<6%it zv+JqqpFpA+&4Oj4U1K?hWK3quzvwKq-B(3|PIlj1{&--g`W?>jO+Kn1stw3HrBU=h zhx$&7!ScbJD=jF6ura(XBCZSz_}!UI^s|irDG?y7LL81A}646c-szohn5SzPL75U{T+51E0!$(=qy} zJmBsrZa0Ul1WKK$`Ytf8axnpF%@=bD-fgeaALyuZ=0Ei6PBdU8`VwD)$Jj=PZ~8`s z@X6+d%A*F%KPO)LI}33?8Yb&Jg@&Hxj?QS>47LVth?&*J4eP%Jln%&JEDVE z!s2Qw*o5CW=)DqF1dP}O2(0LK-RmcaeumD~s?KWHR45@ioaXx}OJl^nLq@ecMi)H&754+{CnVRk!!>VzEqB$} z<|B13__(A8l8w}!pO&hY@8d$`7M@21Ah1#|{|64WDe)f;rq_Mg6)vNKpy-nK`F8%F zHy+@y0Y+L0Fk>$VKfJoYIK|7CoooJ|d$GSsKS1_`z>PxL zibvohddGtlrf9#)q^g^uRF-es)Z1T5wOWh*L{W(xsf;8oIttS3_CDxSP_ofKM1i&`X+Z%lR-u_O<`z z!CBEJ-sc*A3Jn9l2fOgB`Y%y0XTo_zC5>OB;kE@k2V7IXv-=BTm7Z5rx=t-J>amEu z8g{n9oc`I`U;td+uWj%(F2}^SLAY=UZ-rGayQ(L|fVL?Dv7NgxoRNW%_I-1^RkCLC zd-V11LK2D_J;hWjyh*FwkSWaKkU%dro}XR^Re*Obn7CeObw7|4U{0;`8M3z{`a&-; z7f}oh|I2-0h~j5pqqV$v>6hUx&5;7lGQXO~H)9-3ceYyP_!I3kFXE#)YgUO$c3Z1R ztZ3|Gpg8mi!dD{r#z_APsw;r2*VLKGPt3wFt?ewyk3Sp7k~$y`c=`33M%=pl@=-Jv z`CuEfedJd2cx|d|r@r4r)q1OZzw^KrY2#dea3$=Cr$ssz9nLosfx(DjRx%{_RrV2A zTH3!iJ|K4vqDG|O!1EiR(Tiv=DNz7g7CtFEW)4qGdKtW@2pX;Y1a$vGL6rdz9)JLe z!TI#3Y|t~Eqza+$1ps|a4b#0};H5OkGlrB@xp_XelKjOYp9aJR&g8WnIv^eEI^Y@x zM_FFt_bWp|=#%rcoNji^BZkwNoHl zfdOK%%)2C8{v%QtZhtKeN$FM%wWmQ+vxq)(7n7)vd>_; zaA1H-7Kdln+z}VW7{G%pq|(1hXX>#B6IgzTjA9DmsES~9kHL3_#eFmCQYp%y5nk~o|Fd2 zv@vZE@_ZiQmD|j;e*PMxSGpA(Nn3l+138l|hlZaP=2K$kPk^izh*~!8mio7~Yq84< zl6IM9-S1SbxSXP9_B>FHI(W1sVJmtb`gYnzJOGf*0rOB=Ox&!!&SU)^Q=`X*aEcc2zX^A3UO9%a_1oW>pPg$_ zx1|sABZmVHFo7;15oZW<|MxpYfx+_OxCT68@MrMPuU9r=%sny%5x@7d{D!#qg~F$h zuxYMD6B3u*R(^eXnyFGfbUp^(>?t(I-*r#OvAJF~hTUPUJja!`>_D zL^}+nT4Ugsi#wDlFKdmkBgIV>M(A@*WoK3f=fM6ZnPp3vrG>)`>iP8U7cr^yOE zAZ^-&IQky1;Z1^ZH^@p0`!mya{M1CFGIZM~(MqZT&@hB6DM7(fYYR1&@P`%uQU)HQY+a%TM#)Vs zu(DRlB3Mp#vs)EKhLV6G(^j{eFdL|MAyJnsJ~|@;N2LV`Ki4b`CsTepilV8wxr;Pp zmxk=~q__}!q3|&{3`5=6+30$IEcDr_64-?dd>kct(xTD;X{Bj?vt6&S4==g&s&QW)7Xu~^ z!xG0d)GWKfko${e)Y0@by)IIslGkNNGV4zHZn3$6H)-Wo@`dr7>DJp~$ExYK2Esbo zsc%=!lo7(%=SJy{)jbCxVK(%DR?L|HE&Vl|`z&g~)`?70$Zo+%?1*7~s zj(_DKiRHTrgatGq$yesoV0S7kOyKu04)ksY3{(i5hJYU1sJs0~`o}1;NGD%ew6e)C zqGg}4HYowf)amu+}fDOq4SKAKQ3jQ18v#hMuh)8zn5Ph`y7wz zWac#~*sxMD9Q}FWyOO(3j-6*sg;#biW!+IsbnTxyWo+L0?Cp1(Q!4TU?F9F(9D`>U zMfc1+bzx_AeVD}33brBxyp7bLMjfl*yoqeYU9tXkB$#TaX*wr?RoN!#?h23%02AOMt_WAw8p-^ls zZ9xHRXvD?idSe#Y^viG&P=<3!U8R<0+{7xH~_BGlv!uzIhbsBS!yZLjQt`Zf#YnG`c`~#c{JD{Y^l;9*S&3A+#tMtn_cW z;l=-vdB1)@p|cTjn!0PnUOpI*t2{ezS}5)yA5lG_ z5t6YJa4vbj*@OpUllp%G!~;A0YeTsM+taZWZ|^z?{)Je1$a1?QEFgV+N0lAX+q(HK zxcH&+*s$~Zx}R!?D4WX7*lGKF?JP5ZEcikE?yH}$a1(c(2$+<8rL~jMHBnE>cuP!Z zME5Tc_rff@1r~pKS1qIdlPL9AZbeIqwJJaX>Yq0luW{@C``XobClu14<}c9 zuUt8R?eNqR`ggx#8k}AGi>>1B$p>hH-#vTgIfH==27Y`PNK1gyff;}-h@XiQt|Rqt zR$YuOLYwTn=l}o$=Sf6CR4B|O97Upq;(G+F=%Rt|jmb{t95L#Txi>GWOz7D<*~0Hp zdM0ZI0~rkbh%q4d7q$gca1sKEaiFeHXsNk5#|XhbMaJ$yzd6mcRgtxhjO{^P&SG(| z^PfmmNrw)xiP@_XFKt1Y$P5ND82F#VK)(qPTMIwOP(tPII%Qqmn&n?aLcyu<_N##Y zkQ4jw5iA2hI1Ndx^bH-uw?1uuH{YYsOx6qrG8p(Dz`%gSClDGs8D`eaIpHpU)28d% zb$&d8jIM|V4XCV+JTQlcw?jubd&HAN|M;(qP!KN%f4(1G`yW7l2E+^oe(o6PqG;YG zGLmj2T;nG{7L2)tfy?f6L9J%U~dbf&U#042%Oxw95=Y6f3;{`RA=cqc5=>9vl?X9T1qHH0Iws z?@M&TmYJY~>S@G1xg0JQ%^7hc5g#g=tTPi;Wd3F_kio#u0RsajKqMZ)9*&5NQ26LS zmWZQ^PE_0ll{rpP_6P<(aK*p71#3wnMyWqoP~x2nX>uH*Lv`lVXd`z%d+6h@-${t3 zZT|0KWcPFEq)Zz#82G7TV89}njR|`oj=c=b2oQ*zYM4G zi*d+}xGKB6%r%373 +.center-image +{ + margin: 0 auto; + display: block; +} + + +![Prometheus logo](assets/prometheus-logo.png){: .center-image } + +

+ AlertManager configuration +

+ + + See here + + +

+ Prometheus alerting rules +

+ + \ No newline at end of file diff --git a/rules.md b/rules.md new file mode 100644 index 0000000..8604774 --- /dev/null +++ b/rules.md @@ -0,0 +1,105 @@ + + +{% highlight yaml %} +# prometheus.yml + +global: + scrape_interval: 15s + ... + +rule_files: + - 'alerts/*.yml' + +scrape_configs: + ... + +{% endhighlight %} + +{% highlight yaml %} +# alerts/example-redis.yml + +groups: + +- name: ExampleRedisGroup + rules: + - alert: ExampleRedisDown + expr: redis_up{} == 0 + for: 2m + labels: + severity: error + annotations: + summary: "Redis instance down" + description: "Whatever" + +{% endhighlight %} + +
    + {% for service in site.data.rules.services %} + {% assign serviceIndex = forloop.index %} + {% for exporter in service.exporters %} +
  • +

    + {{ serviceIndex }}. + {{ service.name }} + {% if exporter.name %} + : + {% if exporter.doc_url %} + + {{ exporter.name }} + + {% else %} + {{ exporter.name }} + {% endif %} + {% endif %} +

    + + {% assign nbrRules = exporter.rules | size %} + {% if nbrRules == 0 %} +{% highlight javascript %} +// @TODO +{% endhighlight %} + {% endif %} + +
      + {% for rule in exporter.rules %} + {% assign ruleIndex = forloop.index %} +
    • +

      + {{ serviceIndex }}.{{ ruleIndex }}. + {{ rule.name }} +

      +
      + {{ rule.description }} +

      + + {% assign ruleName = rule.name | split: ' ' %} + {% capture ruleNameCamelcase %}{% for word in ruleName %}{{ word | capitalize }} {% endfor %}{% endcapture %} + +{% highlight yaml %} +- alert: {{ ruleNameCamelcase | remove: ' ' }} + expr: {{ rule.query }} + for: 30m + labels: + severity: warning + annotations: + summary: "{{ rule.name }} (instance {% raw %}{{ $labels.instance }}{% endraw %})" + description: "{{ rule.description }}\n VALUE = {% raw %}{{ $value }}{% endraw %}\n LABELS: {% raw %}{{ $labels }}{% endraw %}" + +{% endhighlight %} + +

      +
      +
      +
    • + {% endfor %} +
    + +
    +
  • + {% endfor %} + {% endfor %} +