Merge branch 'master' of github.com:samber/awesome-prometheus-alerts

This commit is contained in:
Samuel Berthe 2022-10-24 16:55:36 +02:00
commit cdf4551ab7
No known key found for this signature in database
GPG key ID: 64863511FFBD0E3C
5 changed files with 47 additions and 6 deletions

View file

@ -231,7 +231,7 @@ GEM
jekyll-seo-tag (~> 2.1)
minitest (5.15.0)
multipart-post (2.1.1)
nokogiri (1.13.6-x86_64-linux)
nokogiri (1.13.9-x86_64-linux)
racc (~> 1.4)
octokit (4.22.0)
faraday (>= 0.9)
@ -273,6 +273,7 @@ GEM
zeitwerk (2.5.4)
PLATFORMS
x86_64-linux
x86_64-linux-musl
DEPENDENCIES

View file

@ -71,7 +71,8 @@ Collection available here: **[https://awesome-prometheus-alerts.grep.to](https:/
- [Juniper](https://awesome-prometheus-alerts.grep.to/rules#juniper)
- [CoreDNS](https://awesome-prometheus-alerts.grep.to/rules#coredns)
- [FreeSwitch](https://awesome-prometheus-alerts.grep.to/rules#freeswitch)
- [Hashicorp Vault](https://awesome-prometheus-alerts.grep.to/rules#Hashicorp-Vault)
- [Hashicorp Vault](https://awesome-prometheus-alerts.grep.to/rules#hashicorp-vault)
- [Cloudflare](https://awesome-prometheus-alerts.grep.to/rules#cloudflare)
#### Other

View file

@ -186,12 +186,12 @@ groups:
for: 2m
- name: Host out of inodes
description: Disk is almost running out of available inodes (< 10% left)
query: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0'
query: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'
severity: warning
for: 2m
- name: Host inodes will fill in 24 hours
description: Filesystem is predicted to run out of inodes within the next 24 hours at current write rate
query: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0'
query: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and predict_linear(node_filesystem_files_free[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'
severity: warning
for: 2m
- name: Host unusual disk read latency
@ -2190,6 +2190,22 @@ groups:
severity: warning
for: 5m
- name: Cloudflare
exporters:
- name: lablabs/cloudflare-exporter
slug: lablabs-cloudflare-exporter
doc_url: https://github.com/lablabs/cloudflare-exporter
rules:
- name: Cloudflare http 4xx error rate
description: 'Cloudflare high HTTP 4xx error rate (> 5% for domain {{ $labels.zone }})'
query: '(sum by(zone) (rate(cloudflare_zone_requests_status{status=~"^4.."}[15m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[15m]))) * 100 > 5'
severity: warning
- name: Cloudflare http 5xx error rate
description: 'Cloudflare high HTTP 5xx error rate (> 5% for domain {{ $labels.zone }})'
query: '(sum by (zone) (rate(cloudflare_zone_requests_status{status=~"^5.."}[5m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[5m]))) * 100 > 5'
severity: critical
- name: Other
services:
- name: Thanos

View file

@ -0,0 +1,23 @@
groups:
- name: LablabsCloudflareExporter
rules:
- alert: CloudflareHttp4xxErrorRate
expr: '(sum by(zone) (rate(cloudflare_zone_requests_status{status=~"^4.."}[15m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[15m]))) * 100 > 5'
for: 0m
labels:
severity: warning
annotations:
summary: Cloudflare http 4xx error rate (instance {{ $labels.instance }})
description: "Cloudflare high HTTP 4xx error rate (> 5% for domain {{ $labels.zone }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: CloudflareHttp5xxErrorRate
expr: '(sum by (zone) (rate(cloudflare_zone_requests_status{status=~"^5.."}[5m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[5m]))) * 100 > 5'
for: 0m
labels:
severity: critical
annotations:
summary: Cloudflare http 5xx error rate (instance {{ $labels.instance }})
description: "Cloudflare high HTTP 5xx error rate (> 5% for domain {{ $labels.zone }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

View file

@ -77,7 +77,7 @@ groups:
description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostOutOfInodes
expr: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0'
expr: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'
for: 2m
labels:
severity: warning
@ -86,7 +86,7 @@ groups:
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostInodesWillFillIn24Hours
expr: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0'
expr: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and predict_linear(node_filesystem_files_free[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0'
for: 2m
labels:
severity: warning