diff --git a/Gemfile.lock b/Gemfile.lock index b05bd09..fb8413f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -231,7 +231,7 @@ GEM jekyll-seo-tag (~> 2.1) minitest (5.15.0) multipart-post (2.1.1) - nokogiri (1.13.6-x86_64-linux) + nokogiri (1.13.9-x86_64-linux) racc (~> 1.4) octokit (4.22.0) faraday (>= 0.9) @@ -273,6 +273,7 @@ GEM zeitwerk (2.5.4) PLATFORMS + x86_64-linux x86_64-linux-musl DEPENDENCIES diff --git a/README.md b/README.md index c914de9..845765e 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,8 @@ Collection available here: **[https://awesome-prometheus-alerts.grep.to](https:/ - [Juniper](https://awesome-prometheus-alerts.grep.to/rules#juniper) - [CoreDNS](https://awesome-prometheus-alerts.grep.to/rules#coredns) - [FreeSwitch](https://awesome-prometheus-alerts.grep.to/rules#freeswitch) -- [Hashicorp Vault](https://awesome-prometheus-alerts.grep.to/rules#Hashicorp-Vault) +- [Hashicorp Vault](https://awesome-prometheus-alerts.grep.to/rules#hashicorp-vault) +- [Cloudflare](https://awesome-prometheus-alerts.grep.to/rules#cloudflare) #### Other diff --git a/_data/rules.yml b/_data/rules.yml index fe02e9b..7b78b4e 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -186,12 +186,12 @@ groups: for: 2m - name: Host out of inodes description: Disk is almost running out of available inodes (< 10% left) - query: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0' + query: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' severity: warning for: 2m - name: Host inodes will fill in 24 hours description: Filesystem is predicted to run out of inodes within the next 24 hours at current write rate - query: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0' + query: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and predict_linear(node_filesystem_files_free[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' severity: warning for: 2m - name: Host unusual disk read latency @@ -2190,6 +2190,22 @@ groups: severity: warning for: 5m + - name: Cloudflare + exporters: + - name: lablabs/cloudflare-exporter + slug: lablabs-cloudflare-exporter + doc_url: https://github.com/lablabs/cloudflare-exporter + rules: + - name: Cloudflare http 4xx error rate + description: 'Cloudflare high HTTP 4xx error rate (> 5% for domain {{ $labels.zone }})' + query: '(sum by(zone) (rate(cloudflare_zone_requests_status{status=~"^4.."}[15m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[15m]))) * 100 > 5' + severity: warning + - name: Cloudflare http 5xx error rate + description: 'Cloudflare high HTTP 5xx error rate (> 5% for domain {{ $labels.zone }})' + query: '(sum by (zone) (rate(cloudflare_zone_requests_status{status=~"^5.."}[5m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[5m]))) * 100 > 5' + severity: critical + + - name: Other services: - name: Thanos diff --git a/dist/rules/cloudflare/lablabs-cloudflare-exporter.yml b/dist/rules/cloudflare/lablabs-cloudflare-exporter.yml new file mode 100644 index 0000000..bb4c018 --- /dev/null +++ b/dist/rules/cloudflare/lablabs-cloudflare-exporter.yml @@ -0,0 +1,23 @@ +groups: + +- name: LablabsCloudflareExporter + + rules: + + - alert: CloudflareHttp4xxErrorRate + expr: '(sum by(zone) (rate(cloudflare_zone_requests_status{status=~"^4.."}[15m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[15m]))) * 100 > 5' + for: 0m + labels: + severity: warning + annotations: + summary: Cloudflare http 4xx error rate (instance {{ $labels.instance }}) + description: "Cloudflare high HTTP 4xx error rate (> 5% for domain {{ $labels.zone }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: CloudflareHttp5xxErrorRate + expr: '(sum by (zone) (rate(cloudflare_zone_requests_status{status=~"^5.."}[5m])) / on (zone) sum by (zone) (rate(cloudflare_zone_requests_status[5m]))) * 100 > 5' + for: 0m + labels: + severity: critical + annotations: + summary: Cloudflare http 5xx error rate (instance {{ $labels.instance }}) + description: "Cloudflare high HTTP 5xx error rate (> 5% for domain {{ $labels.zone }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/dist/rules/host-and-hardware/node-exporter.yml b/dist/rules/host-and-hardware/node-exporter.yml index 5b666ed..88d62a6 100644 --- a/dist/rules/host-and-hardware/node-exporter.yml +++ b/dist/rules/host-and-hardware/node-exporter.yml @@ -77,7 +77,7 @@ groups: description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: HostOutOfInodes - expr: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0' + expr: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' for: 2m labels: severity: warning @@ -86,7 +86,7 @@ groups: description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: HostInodesWillFillIn24Hours - expr: 'node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint="/rootfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{mountpoint="/rootfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/rootfs"} == 0' + expr: 'node_filesystem_files_free / node_filesystem_files * 100 < 10 and predict_linear(node_filesystem_files_free[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0' for: 2m labels: severity: warning