Jekyll based doc

This commit is contained in:
Samuel Berthe 2018-10-22 00:53:32 +02:00
parent 1c28ca9eb7
commit 0bc4a1633c
11 changed files with 804 additions and 108 deletions

14
CONTRIBUTING.md Normal file
View file

@ -0,0 +1,14 @@
# Contributing
## Adding alerting rule
Rules are here: `_data/rules.yml`.
## Run localy
```
gem install bundler
bundle install
bundle exec jekyll serve
```

2
Gemfile Normal file
View file

@ -0,0 +1,2 @@
source 'https://rubygems.org'
gem 'github-pages', group: :jekyll_plugins

248
Gemfile.lock Normal file
View file

@ -0,0 +1,248 @@
GEM
remote: https://rubygems.org/
specs:
activesupport (4.2.10)
i18n (~> 0.7)
minitest (~> 5.1)
thread_safe (~> 0.3, >= 0.3.4)
tzinfo (~> 1.1)
addressable (2.5.2)
public_suffix (>= 2.0.2, < 4.0)
coffee-script (2.4.1)
coffee-script-source
execjs
coffee-script-source (1.11.1)
colorator (1.1.0)
commonmarker (0.17.13)
ruby-enum (~> 0.5)
concurrent-ruby (1.0.5)
dnsruby (1.61.2)
addressable (~> 2.5)
em-websocket (0.5.1)
eventmachine (>= 0.12.9)
http_parser.rb (~> 0.6.0)
ethon (0.11.0)
ffi (>= 1.3.0)
eventmachine (1.2.7)
execjs (2.7.0)
faraday (0.15.3)
multipart-post (>= 1.2, < 3)
ffi (1.9.25)
forwardable-extended (2.6.0)
gemoji (3.0.0)
github-pages (192)
activesupport (= 4.2.10)
github-pages-health-check (= 1.8.1)
jekyll (= 3.7.4)
jekyll-avatar (= 0.6.0)
jekyll-coffeescript (= 1.1.1)
jekyll-commonmark-ghpages (= 0.1.5)
jekyll-default-layout (= 0.1.4)
jekyll-feed (= 0.10.0)
jekyll-gist (= 1.5.0)
jekyll-github-metadata (= 2.9.4)
jekyll-mentions (= 1.4.1)
jekyll-optional-front-matter (= 0.3.0)
jekyll-paginate (= 1.1.0)
jekyll-readme-index (= 0.2.0)
jekyll-redirect-from (= 0.14.0)
jekyll-relative-links (= 0.5.3)
jekyll-remote-theme (= 0.3.1)
jekyll-sass-converter (= 1.5.2)
jekyll-seo-tag (= 2.5.0)
jekyll-sitemap (= 1.2.0)
jekyll-swiss (= 0.4.0)
jekyll-theme-architect (= 0.1.1)
jekyll-theme-cayman (= 0.1.1)
jekyll-theme-dinky (= 0.1.1)
jekyll-theme-hacker (= 0.1.1)
jekyll-theme-leap-day (= 0.1.1)
jekyll-theme-merlot (= 0.1.1)
jekyll-theme-midnight (= 0.1.1)
jekyll-theme-minimal (= 0.1.1)
jekyll-theme-modernist (= 0.1.1)
jekyll-theme-primer (= 0.5.3)
jekyll-theme-slate (= 0.1.1)
jekyll-theme-tactile (= 0.1.1)
jekyll-theme-time-machine (= 0.1.1)
jekyll-titles-from-headings (= 0.5.1)
jemoji (= 0.10.1)
kramdown (= 1.17.0)
liquid (= 4.0.0)
listen (= 3.1.5)
mercenary (~> 0.3)
minima (= 2.5.0)
nokogiri (>= 1.8.2, < 2.0)
rouge (= 2.2.1)
terminal-table (~> 1.4)
github-pages-health-check (1.8.1)
addressable (~> 2.3)
dnsruby (~> 1.60)
octokit (~> 4.0)
public_suffix (~> 2.0)
typhoeus (~> 1.3)
html-pipeline (2.8.4)
activesupport (>= 2)
nokogiri (>= 1.4)
http_parser.rb (0.6.0)
i18n (0.9.5)
concurrent-ruby (~> 1.0)
jekyll (3.7.4)
addressable (~> 2.4)
colorator (~> 1.0)
em-websocket (~> 0.5)
i18n (~> 0.7)
jekyll-sass-converter (~> 1.0)
jekyll-watch (~> 2.0)
kramdown (~> 1.14)
liquid (~> 4.0)
mercenary (~> 0.3.3)
pathutil (~> 0.9)
rouge (>= 1.7, < 4)
safe_yaml (~> 1.0)
jekyll-avatar (0.6.0)
jekyll (~> 3.0)
jekyll-coffeescript (1.1.1)
coffee-script (~> 2.2)
coffee-script-source (~> 1.11.1)
jekyll-commonmark (1.2.0)
commonmarker (~> 0.14)
jekyll (>= 3.0, < 4.0)
jekyll-commonmark-ghpages (0.1.5)
commonmarker (~> 0.17.6)
jekyll-commonmark (~> 1)
rouge (~> 2)
jekyll-default-layout (0.1.4)
jekyll (~> 3.0)
jekyll-feed (0.10.0)
jekyll (~> 3.3)
jekyll-gist (1.5.0)
octokit (~> 4.2)
jekyll-github-metadata (2.9.4)
jekyll (~> 3.1)
octokit (~> 4.0, != 4.4.0)
jekyll-mentions (1.4.1)
html-pipeline (~> 2.3)
jekyll (~> 3.0)
jekyll-optional-front-matter (0.3.0)
jekyll (~> 3.0)
jekyll-paginate (1.1.0)
jekyll-readme-index (0.2.0)
jekyll (~> 3.0)
jekyll-redirect-from (0.14.0)
jekyll (~> 3.3)
jekyll-relative-links (0.5.3)
jekyll (~> 3.3)
jekyll-remote-theme (0.3.1)
jekyll (~> 3.5)
rubyzip (>= 1.2.1, < 3.0)
jekyll-sass-converter (1.5.2)
sass (~> 3.4)
jekyll-seo-tag (2.5.0)
jekyll (~> 3.3)
jekyll-sitemap (1.2.0)
jekyll (~> 3.3)
jekyll-swiss (0.4.0)
jekyll-theme-architect (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-cayman (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-dinky (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-hacker (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-leap-day (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-merlot (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-midnight (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-minimal (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-modernist (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-primer (0.5.3)
jekyll (~> 3.5)
jekyll-github-metadata (~> 2.9)
jekyll-seo-tag (~> 2.0)
jekyll-theme-slate (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-tactile (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-theme-time-machine (0.1.1)
jekyll (~> 3.5)
jekyll-seo-tag (~> 2.0)
jekyll-titles-from-headings (0.5.1)
jekyll (~> 3.3)
jekyll-watch (2.1.2)
listen (~> 3.0)
jemoji (0.10.1)
gemoji (~> 3.0)
html-pipeline (~> 2.2)
jekyll (~> 3.0)
kramdown (1.17.0)
liquid (4.0.0)
listen (3.1.5)
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
ruby_dep (~> 1.2)
mercenary (0.3.6)
mini_portile2 (2.3.0)
minima (2.5.0)
jekyll (~> 3.5)
jekyll-feed (~> 0.9)
jekyll-seo-tag (~> 2.1)
minitest (5.11.3)
multipart-post (2.0.0)
nokogiri (1.8.5)
mini_portile2 (~> 2.3.0)
octokit (4.13.0)
sawyer (~> 0.8.0, >= 0.5.3)
pathutil (0.16.1)
forwardable-extended (~> 2.6)
public_suffix (2.0.5)
rb-fsevent (0.10.3)
rb-inotify (0.9.10)
ffi (>= 0.5.0, < 2)
rouge (2.2.1)
ruby-enum (0.7.2)
i18n
ruby_dep (1.5.0)
rubyzip (1.2.2)
safe_yaml (1.0.4)
sass (3.6.0)
sass-listen (~> 4.0.0)
sass-listen (4.0.0)
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
sawyer (0.8.1)
addressable (>= 2.3.5, < 2.6)
faraday (~> 0.8, < 1.0)
terminal-table (1.8.0)
unicode-display_width (~> 1.1, >= 1.1.1)
thread_safe (0.3.6)
typhoeus (1.3.0)
ethon (>= 0.9.0)
tzinfo (1.2.5)
thread_safe (~> 0.1)
unicode-display_width (1.4.0)
PLATFORMS
ruby
DEPENDENCIES
github-pages
BUNDLED WITH
1.16.6

110
README.md
View file

@ -2,112 +2,8 @@
(WIP)
[https://awesome-prometheus-alerts.grep.to](https://awesome-prometheus-alerts.grep.to)
## Todo
- Write full alert rules in yml files
- Make a small website with form for each rule, to build custom alerts (criticity, thresolds, instance...)
## Queries
### Prometheus internal
- `up == 0` // killed exporters
### node-exporter
Memory:
- `(node_memory_MemFree{} + node_memory_Cached{} + node_memory_Buffers{}) / node_memory_MemTotal{} * 100 < 5`
Network:
- `sum by (instance) (irate(node_network_transmit_bytes{}[2m])) / 1024 / 1024 > 100`
- `sum by (instance) (irate(node_network_receive_bytes{}[2m])) / 1024 / 1024 > 100`
Disk:
- `sum by (instance) (irate(node_disk_bytes_read{}[2m])) / 1024 / 1024 > 50`
- `sum by (instance) (irate(node_disk_bytes_written{}[2m])) / 1024 / 1024 > 50`
- `node_filesystem_free{mountpoint ="/rootfs"} / node_filesystem_size{mountpoint ="/rootfs"} * 100 < 10` // gb
- `node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint ="/rootfs"} * 100` // inodes
- `rate(node_disk_read_time_ms{}[1m]) / rate(node_disk_reads_completed{}[1m]) > 100` // too much latency
- `rate(node_disk_write_time_ms{}[1m]) / rate(node_disk_writes_completed{}[1m]) > 100` // too much latency
CPU:
- `avg by (instance) (sum by (cpu) (rate(node_cpu{mode!="idle"}[2m]))) * 100 > 75` // load
- `rate(node_context_switches{}[5m]) > 1000` // nbr context switch per second
### cAdvisor
- `time() - container_last_seen{} > 60` // get killed container
### Nginx
- `rate(nginx_http_requests_total{status=~"^4.."}[1m]) > 10` // get 4xx http requests
- `rate(nginx_http_requests_total{status=~"^5.."}[1m]) > 10` // get 5xx http requests
### Rabbitmq (kbudde/rabbitmq-exporter)
- `rabbitmq_up{} == 0`
- `rabbitmq_running{} >= 2` // cluster
- `rabbitmq_partitions{} > 0` // cluster got partition :-(
- `rabbitmq_node_mem_used{} / rabbitmq_node_mem_limit{} * 100 > 90` // too much ram used
- `rabbitmq_connectionsTotal{} > 1000`
- `rabbitmq_queue_messages_unacknowledged{queue="my-queue"} > 5`
- `rabbitmq_queue_messages_ready{queue="my-queue"} > 1000` // more consumers needed
- `time() - rabbitmq_queue_head_message_timestamp{queue="my-queue"} > 60` // takes more than 1min to consume messages
- `rabbitmq_queue_consumers{} == 0` // no consumer on queue
- `rate(rabbitmq_exchange_messages_published_in_total{exchange="my-exchange"}[1m]) < 5` // no activity on exchange
### PostgreSQL (wrouesnel/postgres_exporter)
- `pg_up{} == 0`
- `pg_replication_lag{} > 10` // more than 10s lag between master and slave
- `time() - pg_stat_user_tables_last_autovacuum{} > 60 * 60 * 24` // did not vaccum for 1 day
- `time() - pg_stat_user_tables_last_autoanalyze{} > 60 * 60 * 24` // did not analyse for 1 day
- `sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > 100` // too many connections
- `sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) < 5` // connections number too small
- `rate(pg_stat_database_deadlocks{pg_stat_database_de}[1m]) > 0`
### Redis (oliver006/redis_exporter)
- `redis_up{} == 0`
- `time() - redis_rdb_last_save_timestamp_seconds{} > 60 * 60 * 24` // did not backup for 1 day
- `redis_memory_used_bytes{} / redis_total_system_memory_bytes{} * 100 > 90`
- `redis_connected_slaves{}`
- `delta(redis_connected_slaves{}[1m]) < 0` // slaved killed
- `redis_connected_clients{} > 100` // too many connections
- `redis_connected_clients{} < 5` // connections number too small
- `increase(redis_rejected_connections_total{}[1m]) > 0` // rejected connections
### MySQL
### Elasticsearch
### MongoDB
### Apache
### HaProxy
### Traefik
### PHP-FPM
### Kubernetes
### Nomad
### Consul
### Etcd
### Zookeeper
### Linkerd
### Istio
### Blackbox
- In Jekyll, create an alert rule builder, to create custom alerts (criticity, thresolds, instance...)

View file

@ -1 +1,4 @@
theme: jekyll-theme-cayman
theme: jekyll-theme-cayman
title: Awesome Prometheus alerts
description: Collection of alert rules

286
_data/rules.yml Normal file
View file

@ -0,0 +1,286 @@
services:
- name: Prometheus
exporters:
- rules:
- name: Exporter down
description: Prometheus exporter down
query: 'up{} == 0'
severity: warning
- name: Host
exporters:
- name: node-exporter
rules:
- name: Out of memory
description: Node memory is filling up (< 10% left)
query: '(node_memory_MemFree{} + node_memory_Cached{} + node_memory_Buffers{}) / node_memory_MemTotal{} * 100 < 10'
severity: warning
- name: Unusual network throughput in
description: Host network interfaces are probably receiving too much data (> 100 MB/s)
query: 'sum by (instance) (irate(node_network_receive_bytes{}[2m])) / 1024 / 1024 > 100'
severity: warning
- name: Unusual network throughput out
description: Host network interfaces are probably sending too much data (> 100 MB/s)
query: 'sum by (instance) (irate(node_network_transmit_bytes{}[2m])) / 1024 / 1024 > 100'
severity: warning
- name: Unusual disk read rate
description: Disk is probably reading too much data (> 50 MB/s)
query: 'sum by (instance) (irate(node_disk_bytes_read{}[2m])) / 1024 / 1024 > 50'
severity: warning
- name: Unusual disk write rate
description: Disk is probably writing too much data (> 50 MB/s)
query: 'sum by (instance) (irate(node_disk_bytes_written{}[2m])) / 1024 / 1024 > 50'
severity: warning
- name: Out of disk space
description: Disk is almost full (< 10% left)
query: 'node_filesystem_free{mountpoint ="/rootfs"} / node_filesystem_size{mountpoint ="/rootfs"} * 100 < 10'
severity: warning
- name: Out of inodes
description: Disk is almost running out of available inodes (< 10% left)
query: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint ="/rootfs"} * 100 < 10'
severity: warning
- name: Unusual disk read latency
description: Disk latency is growing (read operations > 100ms)
query: 'rate(node_disk_read_time_ms{}[1m]) / rate(node_disk_reads_completed{}[1m]) > 100'
severity: warning
- name: Unusual disk write latency
description: Disk latency is growing (write operations > 100ms)
query: 'rate(node_disk_write_time_ms{}[1m]) / rate(node_disk_writes_completed{}[1m]) > 100'
severity: warning
- name: CPU load
description: CPU load (15m) is high (> 75%)
query: 'avg by (instance) (sum by (cpu) (rate(node_cpu{mode!="idle"}[5m]))) * 100 > 75'
severity: warning
- name: Context switching
description: Context switching is growing on node (> 1000 / s)
query: 'rate(node_context_switches{}[5m]) > 1000'
severity: warning
- name: Docker containers
exporters:
- name: cAdvisor
doc_url: https://github.com/google/cadvisor
rules:
- name: Container killed
description: A container has disappeared
query: 'time() - container_last_seen{} > 60'
severity: warning
- name: Nginx
exporters:
- name: nginx-lua-prometheus
doc_url: https://github.com/knyar/nginx-lua-prometheus
rules:
- name: HTTP errors 4xx
description: Too many HTTP requests with status 4xx (> 5%)
query: 'sum(rate(nginx_http_requests_total{status=~"^4.."}[1m])) / sum(rate(nginx_http_requests_total{}[1m])) * 100 > 5'
severity: error
- name: HTTP errors 5xx
description: Too many HTTP requests with status 5xx (> 5%)
query: 'sum(rate(nginx_http_requests_total{status=~"^5.."}[1m])) / sum(rate(nginx_http_requests_total{}[1m])) * 100 > 5'
severity: error
- name: RabbitMQ
exporters:
- name: kbudde/rabbitmq-exporter
doc_url: https://github.com/kbudde/rabbitmq_exporter
rules:
- name: Rabbitmq down
description: RabbitMQ node down
query: 'rabbitmq_up{} == 0'
severity: error
- name: Cluster down
description: Less than 3 nodes running in RabbitMQ cluster
query: 'rabbitmq_running{} < 3'
severity: error
- name: Cluster partition
description: Cluster partition
query: 'rabbitmq_partitions{} > 0'
severity: error
- name: Out of memory
description: Memory available for RabbmitMQ is low (< 10%)
query: 'rabbitmq_node_mem_used{} / rabbitmq_node_mem_limit{} * 100 > 90'
severity: warning
- name: Too many connections
description: RabbitMQ instance has too many connections (> 1000)
query: 'rabbitmq_connectionsTotal{} > 1000'
severity: warning
- name: Dead letter queue filling up
description: Dead letter queue is filling up (> 10 msgs)
query: 'rabbitmq_queue_messages{queue="my-dead-letter-queue"} > 10'
severity: error
- name: Too many messages in queue
description: Queue is filling up (> 1000 msgs)
query: 'rabbitmq_queue_messages_ready{queue="my-queue"} > 1000'
severity: warning
- name: Slow queue consuming
description: Queue messages are consumed slowly (> 60s)
query: 'time() - rabbitmq_queue_head_message_timestamp{queue="my-queue"} > 60'
severity: warning
- name: No consumer
description: Queue has no consumer
query: 'rabbitmq_queue_consumers{} == 0'
severity: error
- name: Too many consumers
description: Queue should have only 1 consumer
query: 'rabbitmq_queue_consumers{} > 1'
severity: error
- name: Unactive exchange
description: Exchange receive less than 5 msgs per second
query: 'rate(rabbitmq_exchange_messages_published_in_total{exchange="my-exchange"}[1m]) < 5'
severity: warning
- name: MySQL
exporters:
- name: prometheus/mysqld_exporter
doc_url: https://github.com/prometheus/mysqld_exporter
rules:
- name: PostgreSQL
exporters:
- name: wrouesnel/postgres_exporter
doc_url: https://github.com/wrouesnel/postgres_exporter/
rules:
- name: PostgreSQL down
description: PostgreSQL instance is down
query: 'pg_up{} == 0'
severity: error
- name: Replication lag
description: PostgreSQL replication lag is going up (> 10s)
query: 'pg_replication_lag{} > 10'
severity: warning
- name: Table not vaccumed
description: Table has not been vaccum for 24 hours
query: 'time() - pg_stat_user_tables_last_autovacuum{} > 60 * 60 * 24'
severity: warning
- name: Table not analyzed
description: Table has not been analyzed for 24 hours
query: 'time() - pg_stat_user_tables_last_autoanalyze{} > 60 * 60 * 24'
severity: warning
- name: Too many connections
description: PostgreSQL instance has too many connections
query: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > 100'
severity: warning
- name: Not enough connections
description: PostgreSQL instance should have more connections (> 5)
query: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) < 5'
severity: warning
- name: Dead locks
description: PostgreSQL has dead-locks
query: 'rate(pg_stat_database_deadlocks{pg_stat_database_de}[1m]) > 0'
severity: warning
- name: Redis
exporters:
- name: oliver006/redis_exporter
doc_url: https://github.com/oliver006/redis_exporter
rules:
- name: Redis down
description: Redis instance is down
query: 'redis_up{} == 0'
severity: error
- name: Missing backup
description: Redis has not been backuped for 24 hours
query: 'time() - redis_rdb_last_save_timestamp_seconds{} > 60 * 60 * 24'
severity: error
- name: Out of memory
description: Redis is running out of memory (> 90%)
query: 'redis_memory_used_bytes{} / redis_total_system_memory_bytes{} * 100 > 90'
severity: warning
- name: Replication broken
description: Redis instance lost a slave
query: 'delta(redis_connected_slaves{}[1m]) < 0'
severity: error
- name: Too many connections
description: Redis instance has too many connections
query: 'redis_connected_clients{} > 100'
severity: warning
- name: Not enough connections
description: Redis instance should have more connections (> 5)
query: 'redis_connected_clients{} < 5'
severity: warning
- name: Rejected connections
description: Some connections to Redis has been rejected
query: 'increase(redis_rejected_connections_total{}[1m]) > 0'
severity: error
- name: MongoDB
exporters:
- name: dcu/mongodb_exporter
doc_url: https://github.com/dcu/mongodb_exporter
rules:
- name: Elasticsearch
exporters:
- name: justwatchcom/elasticsearch_exporter
doc_url: https://github.com/justwatchcom/elasticsearch_exporter
rules:
- name: Apache
exporters:
- name: Lusitaniae/apache_exporter
doc_url: https://github.com/Lusitaniae/apache_exporter
rules:
- name: HaProxy
exporters:
- name: prometheus/haproxy_exporter
doc_url: https://github.com/prometheus/haproxy_exporter
rules:
- name: Traefik
exporters:
- rules:
- name: PHP-FPM
exporters:
- name: bakins/php-fpm-exporter
doc_url: https://github.com/bakins/php-fpm-exporter
rules:
- name: Kubernetes
exporters:
- rules:
- name: Nomad
exporters:
- name: samber/prometheus-nomad-exporter
doc_url: https://github.com/samber/prometheus-nomad-exporter
rules:
- name: Consul
exporters:
- name: prometheus/consul_exporter
doc_url: https://github.com/prometheus/consul_exporter
rules:
- name: Etcd
exporters:
- rules:
- name: Zookeeper
exporters:
- name: cloudflare/kafka_zookeeper_exporter
doc_url: https://github.com/cloudflare/kafka_zookeeper_exporter
rules:
- name: Kafka
exporters:
- name: danielqsj/kafka_exporter
doc_url: https://github.com/danielqsj/kafka_exporter
rules:
- name: Linkerd
exporters:
- rules:
- name: Istio
exporters:
- rules:
- name: Blackbox
exporters:
- name: prometheus/blackbox_exporter
doc_url: https://github.com/prometheus/blackbox_exporter
rules:

54
_layouts/default.html Normal file
View file

@ -0,0 +1,54 @@
<!DOCTYPE html>
<html lang="{{ site.lang | default: "en-US" }}">
<head>
{% if site.google_analytics %}
<script async src="https://www.googletagmanager.com/gtag/js?id={{ site.google_analytics }}"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', '{{ site.google_analytics }}');
</script>
{% endif %}
<meta charset="UTF-8">
{% seo %}
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#157878">
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
<link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
</head>
<body>
<a id="skip-to-content" href="#content">Skip to the content.</a>
<header class="page-header" role="banner">
<h1 class="project-name">
<a href="/" style="color: white">
{{ site.title | default: site.github.repository_name }}
</a>
</h1>
<h2 class="project-tagline">{{ site.description | default: site.github.project_tagline }}</h2>
<a href="/alertmanager" class="btn">AlertManager config</a>
<a href="/rules" class="btn">Rules</a>
{% if site.github.is_project_page %}
<a href="{{ site.github.repository_url }}" class="btn">View on GitHub</a>
{% endif %}
{% if site.show_downloads %}
<a href="{{ site.github.zip_url }}" class="btn">Download .zip</a>
<a href="{{ site.github.tar_url }}" class="btn">Download .tar.gz</a>
{% endif %}
</header>
<main id="content" class="main-content" role="main">
{{ content }}
<footer class="site-footer">
{% if site.github.is_project_page %}
<span class="site-footer-owner"><a href="{{ site.github.repository_url }}">{{ site.title }}</a> is maintained by <a href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a>.</span>
{% endif %}
<span class="site-footer-credits">This page was generated by <a href="https://pages.github.com">GitHub Pages</a>.</span>
</footer>
</main>
</body>
</html>

56
alertmanager.md Normal file
View file

@ -0,0 +1,56 @@
<h2>
AlertManager configuration
</h2>
{% highlight yaml %}
# alertmanager.yml
route:
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 10s
# When the first notification was sent, wait 'group_interval' to send a betch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 30m
# A default receiver
receiver: "slack"
# All the above attributes are inherited by all child routes and can
# overwritten on each.
routes:
- receiver: "slack"
group_wait: 10s
match_re:
severity: error|warning
continue: true
- receiver: "sms"
group_wait: 10s
match_re:
severity: error
continue: true
receivers:
- name: "slack"
slack_configs:
- api_url: 'https://hooks.slack.com/services/XXXXXXXXX/XXXXXXXXX/xxxxxxxxxxxxxxxxxxxxxxxxxxx'
send_resolved: true
channel: 'monitoring'
text: "{{ range .Alerts }}<!channel> {{ .Annotations.summary }}\n{{ .Annotations.description }}\n{{ end }}"
- name: "sms"
webhook_config:
- url: http://a.b.c:8080/send/sms
send_resolved: true
{% endhighlight %}

BIN
assets/prometheus-logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

32
index.md Normal file
View file

@ -0,0 +1,32 @@
<style>
.center-image
{
margin: 0 auto;
display: block;
}
</style>
![Prometheus logo](assets/prometheus-logo.png){: .center-image }
<h2>
AlertManager configuration
</h2>
<a href="/alertmanager">
See here
</a>
<h2>
Prometheus alerting rules
</h2>
<ul>
{% for service in site.data.rules.services %}
<li>
<a href="/rules#{{ service.name | replace: " ", "-" | downcase }}">
{{ service.name }}
</a>
</li>
{% endfor %}
</ul>

105
rules.md Normal file
View file

@ -0,0 +1,105 @@
<style>
ul {
list-style: none;
}
</style>
{% highlight yaml %}
# prometheus.yml
global:
scrape_interval: 15s
...
rule_files:
- 'alerts/*.yml'
scrape_configs:
...
{% endhighlight %}
{% highlight yaml %}
# alerts/example-redis.yml
groups:
- name: ExampleRedisGroup
rules:
- alert: ExampleRedisDown
expr: redis_up{} == 0
for: 2m
labels:
severity: error
annotations:
summary: "Redis instance down"
description: "Whatever"
{% endhighlight %}
<ul>
{% for service in site.data.rules.services %}
{% assign serviceIndex = forloop.index %}
{% for exporter in service.exporters %}
<li>
<h2 id="{{ service.name | replace: " ", "-" | downcase }}">
{{ serviceIndex }}.
{{ service.name }}
{% if exporter.name %}
:
{% if exporter.doc_url %}
<a href="{{ exporter.doc_url }}">
{{ exporter.name }}
</a>
{% else %}
{{ exporter.name }}
{% endif %}
{% endif %}
</h2>
{% assign nbrRules = exporter.rules | size %}
{% if nbrRules == 0 %}
{% highlight javascript %}
// @TODO
{% endhighlight %}
{% endif %}
<ul>
{% for rule in exporter.rules %}
{% assign ruleIndex = forloop.index %}
<li>
<h4>
{{ serviceIndex }}.{{ ruleIndex }}.
{{ rule.name }}
</h4>
<details {% if true || (serviceIndex == 1 && ruleIndex == 1) %} open {% endif %}>
<summary>{{ rule.description }}</summary>
<p>
{% assign ruleName = rule.name | split: ' ' %}
{% capture ruleNameCamelcase %}{% for word in ruleName %}{{ word | capitalize }} {% endfor %}{% endcapture %}
{% highlight yaml %}
- alert: {{ ruleNameCamelcase | remove: ' ' }}
expr: {{ rule.query }}
for: 30m
labels:
severity: warning
annotations:
summary: "{{ rule.name }} (instance {% raw %}{{ $labels.instance }}{% endraw %})"
description: "{{ rule.description }}\n VALUE = {% raw %}{{ $value }}{% endraw %}\n LABELS: {% raw %}{{ $labels }}{% endraw %}"
{% endhighlight %}
</p>
</details>
<br/>
</li>
{% endfor %}
</ul>
<hr/>
</li>
{% endfor %}
{% endfor %}
</ul>