From 9bbe04799fd79814c916b04744b96251fda67a03 Mon Sep 17 00:00:00 2001 From: Samuel Berthe Date: Wed, 15 Jun 2022 01:42:18 +0200 Subject: [PATCH] feat: build and publish into dist/rules --- .github/workflows/dist.yml | 59 ++++++++++++++++++++++++++++++ .github/workflows/test.yml | 46 +++++++++++------------ _data/rules.yml | 75 ++++++++++++++++++++++++++++++++++---- dist/template.yml | 16 ++++++++ 4 files changed, 166 insertions(+), 30 deletions(-) create mode 100644 .github/workflows/dist.yml create mode 100644 dist/template.yml diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml new file mode 100644 index 0000000..953d99f --- /dev/null +++ b/.github/workflows/dist.yml @@ -0,0 +1,59 @@ +name: Publish + +on: + push: + branches: + - master + +jobs: + publish: + name: Publish + # Check if the PR is not from a fork + if: github.repository_owner == 'samber' + runs-on: ubuntu-latest + steps: + - name: Checkout Repo + uses: actions/checkout@v2 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: 2.6 + + - name: Set up yq + uses: mikefarah/yq@master + + - name: Install liquid + run: gem install liquid-cli + + - name: Build rule configuration + run: | + gem install liquid-cli + cat _data/rules.yml | yq -I 0 -o json > _data/rules.json + + rm -rf dist/rules + + for service in $(cat _data/rules.json | jq -r '.groups[].services[] | @base64'); do + subdir=dist/rules/$(echo ${service} | base64 --decode | jq -r '.name | ascii_downcase | split(" ") | join("-")') + mkdir -p "${subdir}" + + # groupName=$(echo "{% assign groupName = name | split: ' ' %}{% capture groupNameCamelcase %}{% for word in groupName %}{{ word | capitalize }} {% endfor %}{% endcapture %} {{ groupNameCamelcase | remove: ' ' | remove: '-' }}" | liquid $(echo ${service} | base64 --decode | jq -r '.name | ascii_downcase | split(" ") | join("-")')) + + for exporter in $(echo ${service} | base64 --decode | jq -r '.exporters[] | @base64'); do + exporterName=$(echo ${exporter} | base64 --decode | jq -r '.slug') + cat dist/template.yml | liquid "$(echo ${exporter} | base64 --decode)" > ${subdir}/${exporterName}.yml + done + done + + # https://peterevans.dev/posts/github-actions-how-to-automate-code-formatting-in-pull-requests/ + - name: Check for modified files + id: git-check + run: echo ::set-output name=modified::$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) + - name: Push changes + if: steps.git-check.outputs.modified == 'true' + run: | + git config --global user.name 'Samuel Berthe' + git config --global user.email 'samuel-berthe@users.noreply.github.com' + git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} + git commit -am "Publish" + git push diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f57a0b7..2f1b823 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,31 +7,31 @@ jobs: name: Check alert rules syntax runs-on: ubuntu-latest steps: - - name: Checkout Repo - uses: actions/checkout@v2 + - name: Checkout Repo + uses: actions/checkout@v2 - - name: Set up Ruby - uses: ruby/setup-ruby@v1 - with: - ruby-version: 2.6 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: 2.6 - - name: Set up yq - uses: mikefarah/yq@master + - name: Set up yq + uses: mikefarah/yq@master - - name: Install liquid - run: gem install liquid-cli + - name: Install liquid + run: gem install liquid-cli - - name: Build rule configuration - run: | - gem install liquid-cli - cat _data/rules.yml | yq -I 0 -o json > _data/rules.json - cat test/template.yml | liquid "$(< _data/rules.json)" > test/rules.yml + - name: Build rule configuration + run: | + gem install liquid-cli + cat _data/rules.yml | yq -I 0 -o json > _data/rules.json + cat test/template.yml | liquid "$(< _data/rules.json)" > test/rules.yml - - name: Check Prometheus alert rules - uses: peimanja/promtool-github-actions@master - with: - promtool_actions_subcommand: 'rules' - promtool_actions_files: 'test/rules.yml' - promtool_actions_comment: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Check Prometheus alert rules + uses: peimanja/promtool-github-actions@master + with: + promtool_actions_subcommand: 'rules' + promtool_actions_files: 'test/rules.yml' + promtool_actions_comment: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/_data/rules.yml b/_data/rules.yml index d7372ab..a564797 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -12,7 +12,8 @@ groups: services: - name: Prometheus self-monitoring exporters: - - rules: + - slug: embedded-exporter + rules: - name: Prometheus job missing description: A Prometheus job has disappeared query: 'absent(up{job="prometheus"})' @@ -124,6 +125,7 @@ groups: - name: Host and hardware exporters: - name: node-exporter + slug: node-exporter doc_url: https://github.com/prometheus/node_exporter rules: - name: Host out of memory @@ -303,6 +305,7 @@ groups: - name: Docker containers exporters: - name: google/cAdvisor + slug: google-cadvisor doc_url: https://github.com/google/cadvisor rules: - name: Container killed @@ -343,6 +346,7 @@ groups: - name: Blackbox exporters: - name: prometheus/blackbox_exporter + slug: blackbox-exporter doc_url: https://github.com/prometheus/blackbox_exporter rules: - name: Blackbox probe failed @@ -384,6 +388,7 @@ groups: - name: Windows Server exporters: - name: prometheus-community/windows_exporter + slug: windows-exporter doc_url: https://github.com/prometheus-community/windows_exporter rules: - name: Windows Server collector Error @@ -413,6 +418,7 @@ groups: - name: VMware exporters: - name: pryorda/vmware_exporter + slug: pryorda-vmware-exporter doc_url: https://github.com/pryorda/vmware_exporter rules: - name: Virtual Machine Memory Warning @@ -439,6 +445,7 @@ groups: - name: Netdata exporters: - name: Embedded exporter + slug: embedded-exporter doc_url: https://github.com/netdata/netdata/blob/master/backends/prometheus/README.md rules: - name: Netdata high cpu usage @@ -489,6 +496,7 @@ groups: - name: MySQL exporters: - name: prometheus/mysqld_exporter + slug: mysqld-exporter doc_url: https://github.com/prometheus/mysqld_exporter rules: - name: MySQL down @@ -535,6 +543,7 @@ groups: - name: PostgreSQL exporters: - name: prometheus-community/postgres_exporter + slug: postgres-exporter doc_url: https://github.com/prometheus-community/postgres_exporter rules: - name: Postgresql down @@ -642,6 +651,7 @@ groups: - name: SQL Server exporters: - name: Ozarklake/prometheus-mssql-exporter + slug: ozarklake-mssql-exporter doc_url: https://github.com/Ozarklake/prometheus-mssql-exporter rules: - name: SQL Server down @@ -656,6 +666,7 @@ groups: - name: PGBouncer exporters: - name: spreaker/prometheus-pgbouncer-exporter + slug: spreaker-pgbouncer-exporter doc_url: https://github.com/spreaker/prometheus-pgbouncer-exporter rules: - name: PGBouncer active connections @@ -675,6 +686,7 @@ groups: - name: Redis exporters: - name: oliver006/redis_exporter + slug: oliver006-redis-exporter doc_url: https://github.com/oliver006/redis_exporter rules: - name: Redis down @@ -736,6 +748,7 @@ groups: - name: MongoDB exporters: - name: percona/mongodb_exporter + slug: percona-mongodb-exporter doc_url: https://github.com/percona/mongodb_exporter rules: - name: MongoDB Down @@ -772,6 +785,7 @@ groups: for: 2m - name: dcu/mongodb_exporter + slug: dcu-mongodb-exporter doc_url: https://github.com/dcu/mongodb_exporter rules: - name: MongoDB replication lag @@ -820,6 +834,7 @@ groups: for: 2m - name: stefanprodan/mgob + slug: stefanprodan-mgob-exporter doc_url: https://github.com/stefanprodan/mgob rules: - name: Mgob backup failed @@ -830,6 +845,7 @@ groups: - name: RabbitMQ exporters: - name: rabbitmq/rabbitmq-prometheus + slug: rabbitmq-exporter doc_url: https://github.com/rabbitmq/rabbitmq-prometheus rules: - name: Rabbitmq node down @@ -877,6 +893,7 @@ groups: for: 2m - name: kbudde/rabbitmq-exporter + slug: kbudde-rabbitmq-exporter doc_url: https://github.com/kbudde/rabbitmq_exporter rules: - name: Rabbitmq down @@ -944,6 +961,7 @@ groups: - name: Elasticsearch exporters: - name: justwatchcom/elasticsearch_exporter + slug: justwatchcom-elasticsearch-exporter doc_url: https://github.com/justwatchcom/elasticsearch_exporter rules: - name: Elasticsearch Heap Usage Too High @@ -1016,6 +1034,7 @@ groups: - name: Cassandra exporters: - name: instaclustr/cassandra-exporter + slug: instaclustr-cassandra-exporter doc_url: https://github.com/instaclustr/cassandra-exporter rules: - name: 'Cassandra Node is unavailable' @@ -1077,6 +1096,7 @@ groups: severity: critical - name: criteo/cassandra_exporter + slug: criteo-cassandra-exporter doc_url: https://github.com/criteo/cassandra_exporter rules: - name: Cassandra hints count @@ -1165,9 +1185,11 @@ groups: - name: Zookeeper exporters: - name: cloudflare/kafka_zookeeper_exporter + slug: cloudflare-kafka-zookeeper-exporter doc_url: https://github.com/cloudflare/kafka_zookeeper_exporter rules: - name: dabealu/zookeeper-exporter + slug: dabealu-zookeeper-exporter doc_url: https://github.com/dabealu/zookeeper-exporter rules: - name: Zookeeper Down @@ -1191,6 +1213,7 @@ groups: - name: Kafka exporters: - name: danielqsj/kafka_exporter + slug: danielqsj-kafka-exporter doc_url: https://github.com/danielqsj/kafka_exporter rules: - name: Kafka topics replicas @@ -1203,6 +1226,7 @@ groups: severity: critical for: 1m - name: linkedin/Burrow + slug: linkedin-kafka-exporter doc_url: https://github.com/linkedin/Burrow rules: - name: Kafka topic offset decreased @@ -1218,6 +1242,7 @@ groups: - name: Pulsar exporters: - name: embedded exporter + slug: embedded-exporter doc_url: https://pulsar.apache.org/docs/reference-metrics/ rules: - name: Pulsar subscription high number of backlog entries @@ -1274,6 +1299,7 @@ groups: - name: Solr exporters: - name: embedded exporter + slug: embedded-exporter doc_url: https://solr.apache.org/guide/8_11/monitoring-solr-with-prometheus-and-grafana.html rules: - name: Solr update errors @@ -1298,7 +1324,8 @@ groups: services: - name: Nginx exporters: - - name: nginx-lua-prometheus + - name: knyar/nginx-lua-prometheus + slug: knyar-nginx-exporter doc_url: https://github.com/knyar/nginx-lua-prometheus rules: - name: Nginx high HTTP 4xx error rate @@ -1320,6 +1347,7 @@ groups: - name: Apache exporters: - name: Lusitaniae/apache_exporter + slug: lusitaniae-apache-exporter doc_url: https://github.com/Lusitaniae/apache_exporter rules: - name: Apache down @@ -1339,6 +1367,7 @@ groups: - name: HaProxy exporters: - name: Embedded exporter (HAProxy >= v2) + slug: embedded-exporter-v2 doc_url: https://github.com/haproxy/haproxy/tree/master/contrib/prometheus-exporter rules: - name: HAProxy high HTTP 4xx error rate backend @@ -1410,6 +1439,7 @@ groups: severity: warning for: 1m - name: prometheus/haproxy_exporter (HAProxy < v2) + slug: haproxy-exporter-v1 doc_url: https://github.com/prometheus/haproxy_exporter rules: - name: HAProxy down @@ -1492,6 +1522,7 @@ groups: - name: Traefik exporters: - name: Embedded exporter v2 + slug: embedded-exporter-v2 doc_url: https://docs.traefik.io/observability/metrics/prometheus/ rules: - name: Traefik service down @@ -1509,6 +1540,7 @@ groups: severity: critical for: 1m - name: Embedded exporter v1 + slug: embedded-exporter-v1 doc_url: https://docs.traefik.io/observability/metrics/prometheus/ rules: - name: Traefik backend down @@ -1531,6 +1563,7 @@ groups: - name: PHP-FPM exporters: - name: bakins/php-fpm-exporter + slug: bakins-fpm-exporter doc_url: https://github.com/bakins/php-fpm-exporter rules: - name: PHP-FPM max-children reached @@ -1541,6 +1574,7 @@ groups: - name: JVM exporters: - name: java-client + slug: jvm-exporter doc_url: https://github.com/prometheus/client_java rules: - name: JVM memory filling up @@ -1552,6 +1586,7 @@ groups: - name: Sidekiq exporters: - name: Strech/sidekiq-prometheus-exporter + slug: strech-sidekiq-exporter doc_url: https://github.com/Strech/sidekiq-prometheus-exporter rules: - name: Sidekiq queue size @@ -1570,6 +1605,7 @@ groups: - name: Kubernetes exporters: - name: kube-state-metrics + slug: kubestate-exporter doc_url: https://github.com/kubernetes/kube-state-metrics/tree/master/docs rules: - name: Kubernetes Node ready @@ -1732,6 +1768,7 @@ groups: - name: Nomad exporters: - name: Embedded exporter + slug: embedded-exporter rules: - name: Nomad job failed description: Nomad job failed @@ -1754,6 +1791,7 @@ groups: - name: Consul exporters: - name: prometheus/consul_exporter + slug: consul-exporter doc_url: https://github.com/prometheus/consul_exporter rules: - name: Consul service healthcheck failed @@ -1772,7 +1810,9 @@ groups: - name: Etcd exporters: - - rules: + - name: Embedded exporter + slug: embedded-exporter + rules: - name: Etcd insufficient Members description: Etcd cluster should have an odd number of members query: 'count(etcd_server_id) % 2 == 0' @@ -1839,6 +1879,7 @@ groups: - name: Linkerd exporters: - name: Embedded exporter + slug: embedded-exporter doc_url: https://linkerd.io/2/tasks/exporting-metrics/ rules: - name: Linkerd high error rate @@ -1850,6 +1891,7 @@ groups: - name: Istio exporters: - name: Embedded exporter + slug: embedded-exporter doc_url: https://istio.io/latest/docs/tasks/observability/metrics/querying-metrics/ rules: - name: Istio Kubernetes gateway availability drop @@ -1907,6 +1949,7 @@ groups: - name: Ceph exporters: - name: Embedded exporter + slug: embedded-exporter doc_url: https://docs.ceph.com/docs/luminous/mgr/prometheus/ rules: - name: Ceph State @@ -1972,6 +2015,7 @@ groups: - name: SpeedTest exporters: - name: Speedtest exporter + slug: nlamirault-speedtest-exporter doc_url: https://github.com/nlamirault/speedtest_exporter rules: - name: SpeedTest Slow Internet Download @@ -1986,12 +2030,14 @@ groups: - name: ZFS exporters: - name: node-exporter + slug: node-exporter doc_url: https://github.com/prometheus/node_exporter rules: - name: OpenEBS exporters: - name: Embedded exporter + slug: embedded-exporter rules: - name: OpenEBS used pool capacity description: 'OpenEBS Pool use more than 80% of his capacity' @@ -2002,6 +2048,7 @@ groups: - name: Minio exporters: - name: Embedded exporter + slug: embedded-exporter rules: - name: Minio disk offline description: 'Minio disk is offline' @@ -2015,6 +2062,7 @@ groups: - name: SSL/TLS exporters: - name: ssl_exporter + slug: ribbybibby-ssl-exporter doc_url: https://github.com/ribbybibby/ssl_exporter rules: - name: SSL certificate probe failed @@ -2037,6 +2085,7 @@ groups: - name: Juniper exporters: - name: czerwonk/junos_exporter + slug: czerwonk-junos-exporter doc_url: https://github.com/czerwonk/junos_exporter rules: - name: Juniper switch down @@ -2057,6 +2106,7 @@ groups: - name: CoreDNS exporters: - name: Embedded exporter + slug: embedded-exporter rules: - name: CoreDNS Panic Count description: Number of CoreDNS panics encountered @@ -2066,6 +2116,7 @@ groups: - name: Freeswitch exporters: - name: znerol/prometheus-freeswitch-exporter + slug: znerol-freeswitch-exporter doc_url: https://pypi.org/project/prometheus-freeswitch-exporter rules: - name: Freeswitch down @@ -2086,6 +2137,7 @@ groups: - name: Hashicorp Vault exporters: - name: Embedded exporter + slug: embedded-exporter doc_url: https://github.com/hashicorp/vault/blob/master/website/content/docs/configuration/telemetry.mdx#prometheus rules: - name: Vault sealed @@ -2107,7 +2159,9 @@ groups: services: - name: Thanos exporters: - - rules: + - name: Embedded exporter + slug: embedded-exporter + rules: - name: Thanos compaction halted description: Thanos compaction has failed to run and is now halted. query: 'thanos_compact_halted == 1' @@ -2122,7 +2176,9 @@ groups: severity: critical - name: Loki exporters: - - rules: + - name: Embedded exporter + slug: embedded-exporter + rules: - name: Loki process too many restarts description: A loki process had too many restarts (target {{ $labels.instance }}) query: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2 @@ -2144,7 +2200,9 @@ groups: for: 5m - name: Promtail exporters: - - rules: + - name: Embedded exporter + slug: embedded-exporter + rules: - name: Promtail request errors description: The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. query: '100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance) > 10' @@ -2157,7 +2215,9 @@ groups: for: 5m - name: Cortex exporters: - - rules: + - name: Embedded exporter + slug: embedded-exporter + rules: - name: Cortex ruler configuration reload failure description: Cortex ruler configuration reload failure (instance {{ $labels.instance }}) query: cortex_ruler_config_last_reload_successful != 1 @@ -2187,6 +2247,7 @@ groups: - name: Jenkins exporters: - name: Metric plugin + slug: metric-plugin doc_url: https://plugins.jenkins.io/prometheus/ rules: - name: Jenkins offline diff --git a/dist/template.yml b/dist/template.yml new file mode 100644 index 0000000..0dd9684 --- /dev/null +++ b/dist/template.yml @@ -0,0 +1,16 @@ +groups: +{% assign groupName = slug | split: '-' %}{% capture groupNameCamelcase %}{% for word in groupName %}{{ word | capitalize }} {% endfor %}{% endcapture %} +- name: {{ groupNameCamelcase | remove: ' ' | remove: '-' }} + + rules: +{% for rule in rules %}{% assign ruleName = rule.name | split: ' ' %}{% capture ruleNameCamelcase %}{% for word in ruleName %}{{ word | capitalize }} {% endfor %}{% endcapture %} + {% for comment in comments %}# {{ comment | strip }} + {% endfor %}- alert: {{ ruleNameCamelcase | remove: ' ' }} + expr: '{{ rule.query }}' + for: {% if rule.for %}{{ rule.for }}{% else %}0m{% endif %} + labels: + severity: {{ rule.severity }} + annotations: + summary: {{ rule.name }} (instance {% raw %}{{ $labels.instance }}{% endraw %}) + description: "{{ rule.description | replace: '"', '\"' }}\n VALUE = {% raw %}{{ $value }}{% endraw %}\n LABELS = {% raw %}{{ $labels }}{% endraw %}" +{% endfor %} \ No newline at end of file