diff --git a/_data/rules.yml b/_data/rules.yml index fb325c7..255b5fb 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -730,9 +730,11 @@ groups: See https://github.com/samber/awesome-prometheus-alerts/issues/289#issuecomment-1164842737 - name: Postgresql invalid index description: "The table {{ $labels.relname }} has an invalid index: {{ $labels.indexrelname }}. You should execute `DROP INDEX {{ $labels.indexrelname }};`" - query: 'pg_genaral_index_info_pg_relation_size{indexrelname=~".*ccnew.*"}' + query: 'pg_general_index_info_pg_relation_size{indexrelname=~".*ccnew.*"}' severity: warning for: 6h + comments: | + See https://github.com/samber/awesome-prometheus-alerts/issues/289#issuecomment-1164842737 - name: SQL Server exporters: @@ -1537,7 +1539,7 @@ groups: for: 3m - name: Nats high memory usage description: NATS server memory usage is above 200MB for {{ $labels.instance }} - query: "gnatsd_varz_mem > 200 * 1024 * 1024" + query: "gnatsd_varz_mem > 200000000" severity: warning for: 5m - name: Nats slow consumers @@ -1547,25 +1549,25 @@ groups: for: 3m - name: Nats server down description: NATS server has been down for more than 5 minutes - query: "absent(up{job='nats'})" + query: "absent(gnatsd_connz_total)" severity: critical for: 5m - - name: High CPU usage on NATS server + - name: Nats high CPU usage description: NATS server is using more than 80% CPU for the last 5 minutes query: "rate(gnatsd_varz_cpu[5m]) > 0.8" severity: warning for: 5m - - name: High number of connections in NATS + - name: Nats high number of connections description: NATS server has more than 1000 active connections query: "gnatsd_connz_num_connections > 1000" severity: warning for: 5m - - name: High JetStream store usage + - name: Nats high JetStream store usage description: JetStream store usage is over 80% query: "gnatsd_varz_jetstream_stats_storage / gnatsd_varz_jetstream_config_max_storage > 0.8" severity: warning for: 5m - - name: High JetStream memory usage + - name: Nats high JetStream memory usage description: JetStream memory usage is over 80% query: "gnatsd_varz_jetstream_stats_memory / gnatsd_varz_jetstream_config_max_memory > 0.8" severity: warning @@ -1575,42 +1577,42 @@ groups: query: "gnatsd_connz_subscriptions > 1000" severity: warning for: 5m - - name: High pending messages in NATS - description: NATS server has more than 100,000 pending bytes + - name: Nats high pending messages + description: NATS server has more than 100,000 pending messages query: "gnatsd_connz_pending_bytes > 100000" severity: warning for: 5m - - name: Errors in NATS + - name: Nats too many errors description: NATS server has encountered errors in the last 5 minutes query: "increase(gnatsd_varz_jetstream_stats_api_errors[5m]) > 0" severity: warning for: 5m - - name: JetStream consumers exceeded + - name: Nats JetStream consumers exceeded description: JetStream has more than 100 active consumers query: "sum(gnatsd_varz_jetstream_stats_accounts) > 100" severity: warning for: 5m - - name: Frequent authentication timeouts in NATS + - name: Nats frequent authentication timeouts description: There have been more than 5 authentication timeouts in the last 5 minutes query: "increase(gnatsd_varz_auth_timeout[5m]) > 5" severity: warning for: 5m - - name: Max payload size exceeded in NATS - description: The max payload size allowed by NATS has been exceeded (1MB) - query: "max(gnatsd_varz_max_payload) > 1024 * 1024" + - name: Nats max payload size exceeded + description: The max payload size allowed by NATS has been exceeded + query: "max(gnatsd_varz_max_payload) > 1000000" severity: critical for: 5m - - name: Leaf node connection issue in NATS + - name: Nats leaf node connection issue description: No leaf node connections have been established in the last 5 minutes query: "increase(gnatsd_varz_leafnodes[5m]) == 0" severity: critical for: 5m - - name: Max ping operations exceeded in NATS + - name: Nats max ping operations exceeded description: The maximum number of ping operations in NATS has exceeded 50 query: "gnatsd_varz_ping_max > 50" severity: warning for: 5m - - name: Write deadline exceeded in NATS + - name: Nats write deadline exceeded description: The write deadline has been exceeded in NATS, indicating potential message delivery issues query: "gnatsd_varz_write_deadline > 10" severity: critical diff --git a/dist/rules/postgresql/postgres-exporter.yml b/dist/rules/postgresql/postgres-exporter.yml index 0e1f473..2ab461f 100644 --- a/dist/rules/postgresql/postgres-exporter.yml +++ b/dist/rules/postgresql/postgres-exporter.yml @@ -185,7 +185,7 @@ groups: description: "The table {{ $labels.relname }} is bloated. You should execute `VACUUM {{ $labels.relname }};`\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: PostgresqlInvalidIndex - expr: 'pg_genaral_index_info_pg_relation_size{indexrelname=~".*ccnew.*"}' + expr: 'pg_general_index_info_pg_relation_size{indexrelname=~".*ccnew.*"}' for: 6h labels: severity: warning