mirror of
https://github.com/samber/awesome-prometheus-alerts.git
synced 2026-06-22 01:17:19 +08:00
chore(ci): adding test with promtool (#281)
This commit is contained in:
parent
4d161ee0a5
commit
8941f71c6c
5 changed files with 76 additions and 6 deletions
35
.github/workflows/test.yml
vendored
Normal file
35
.github/workflows/test.yml
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
name: Promtool check
|
||||
|
||||
on: [pull_request, push]
|
||||
|
||||
jobs:
|
||||
promtool-check:
|
||||
name: Check alert rules syntax
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Ruby
|
||||
uses: actions/setup-ruby@v1
|
||||
|
||||
- name: Set up yq
|
||||
uses: mikefarah/yq@master
|
||||
|
||||
- name: Install liquid
|
||||
run: gem install liquid-cli
|
||||
|
||||
- name: Build rule configuration
|
||||
run: |
|
||||
gem install liquid-cli
|
||||
cat _data/rules.yml | yq -I 0 -o json > _data/rules.json
|
||||
cat test/template.yml | liquid "$(< _data/rules.json)" > test/rules.yml
|
||||
|
||||
- name: Check Prometheus alert rules
|
||||
uses: peimanja/promtool-github-actions@master
|
||||
with:
|
||||
promtool_actions_subcommand: 'rules'
|
||||
promtool_actions_files: 'test/rules.yml'
|
||||
promtool_actions_comment: true
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -2,3 +2,5 @@ _site/
|
|||
.sass-cache/
|
||||
.jekyll-cache/
|
||||
.jekyll-metadata
|
||||
_data/rules.json
|
||||
test/rules.yml
|
||||
|
|
|
|||
|
|
@ -1212,9 +1212,7 @@ groups:
|
|||
severity: warning
|
||||
- name: Kafka consumer lag
|
||||
description: Kafka consumer has a 30 minutes and increasing lag
|
||||
query: |
|
||||
'kafka_burrow_topic_partition_offset - on(partition, cluster, topic) group_right() kafka_burrow_partition_current_offset >= (kafka_burrow_topic_partition_offset offset 15m - on(partition, cluster, topic) group_right() kafka_burrow_partition_current_offset offset 15m)
|
||||
AND kafka_burrow_topic_partition_offset - on(partition, cluster, topic) group_right() kafka_burrow_partition_current_offset > 0'
|
||||
query: 'kafka_burrow_topic_partition_offset - on(partition, cluster, topic) group_right() kafka_burrow_partition_current_offset >= (kafka_burrow_topic_partition_offset offset 15m - on(partition, cluster, topic) group_right() kafka_burrow_partition_current_offset offset 15m) AND kafka_burrow_topic_partition_offset - on(partition, cluster, topic) group_right() kafka_burrow_partition_current_offset > 0'
|
||||
severity: warning
|
||||
for: 15m
|
||||
|
||||
|
|
@ -1414,7 +1412,7 @@ groups:
|
|||
for: 1m
|
||||
- name: HAProxy retry high
|
||||
description: High rate of retry on {{ $labels.fqdn }}/{{ $labels.backend }} backend
|
||||
query: 'rate(sum by (backend) (haproxy_backend_retry_warnings_total)) > 10'
|
||||
query: 'sum by (backend) (rate(haproxy_backend_retry_warnings_total[1m])) > 10'
|
||||
severity: warning
|
||||
for: 2m
|
||||
- name: HAProxy backend down
|
||||
|
|
@ -1427,12 +1425,12 @@ groups:
|
|||
severity: critical
|
||||
- name: HAProxy frontend security blocked requests
|
||||
description: HAProxy is blocking requests for security reason
|
||||
query: 'rate(sum by (frontend) (haproxy_frontend_requests_denied_total)) > 10'
|
||||
query: 'sum by (frontend) (rate(haproxy_frontend_requests_denied_total[2m])) > 10'
|
||||
severity: warning
|
||||
for: 2m
|
||||
- name: HAProxy server healthcheck failure
|
||||
description: Some server healthcheck are failing on {{ $labels.server }}
|
||||
query: 'increase(haproxy_server_check_failures_total) > 0'
|
||||
query: 'increase(haproxy_server_check_failures_total[1m]) > 0'
|
||||
severity: warning
|
||||
for: 1m
|
||||
|
||||
|
|
|
|||
17
test/README.md
Normal file
17
test/README.md
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
Since some rules can be duplicated (multiple exporters), I added a prefix to rule name.
|
||||
|
||||
Error:
|
||||
|
||||
```
|
||||
$ promtool check rules test/rules.yml
|
||||
Checking rules.yml
|
||||
29 duplicate rule(s) found.
|
||||
Metric: CassandraClientRequestReadFailure
|
||||
Label(s):
|
||||
severity: critical
|
||||
|
||||
[...]
|
||||
|
||||
Might cause inconsistency while recording expressions.
|
||||
```
|
||||
18
test/template.yml
Normal file
18
test/template.yml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
groups:
|
||||
|
||||
- name: AllRules
|
||||
rules:
|
||||
{% for group in groups %}{% assign groupIndex = forloop.index %}{% for service in group.services %}{% assign serviceIndex = forloop.index %}{% for exporter in service.exporters %}{% assign exporterIndex = forloop.index %}{% for rule in exporter.rules %}
|
||||
{% assign ruleName = rule.name | split: ' ' %}{% capture ruleNameCamelcase %}{% for word in ruleName %}{{ word | capitalize }} {% endfor %}{% endcapture %}
|
||||
|
||||
{% for comment in comments %}# {{ comment | strip }}
|
||||
{% endfor %}- alert: {{ groupIndex }}_{{ serviceIndex }}_{{ exporterIndex }}_{{ ruleNameCamelcase | remove: ' ' }}
|
||||
expr: '{{ rule.query }}'
|
||||
for: {% if rule.for %}{{ rule.for }}{% else %}0m{% endif %}
|
||||
labels:
|
||||
severity: {{ rule.severity }}
|
||||
annotations:
|
||||
summary: {{ rule.name }} (instance {% raw %}{{ $labels.instance }}{% endraw %})
|
||||
description: "{{ rule.description | replace: '"', '\"' }}\n VALUE = {% raw %}{{ $value }}{% endraw %}\n LABELS = {% raw %}{{ $labels }}{% endraw %}"
|
||||
|
||||
{% endfor %}{% endfor %}{% endfor %}{% endfor %}
|
||||
Loading…
Reference in a new issue