diff --git a/_data/rules.yml b/_data/rules.yml index 255b5fb..6f5d04d 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -1539,7 +1539,7 @@ groups: for: 3m - name: Nats high memory usage description: NATS server memory usage is above 200MB for {{ $labels.instance }} - query: "gnatsd_varz_mem > 200000000" + query: "gnatsd_varz_mem > 200 * 1024 * 1024" severity: warning for: 5m - name: Nats slow consumers @@ -1549,7 +1549,7 @@ groups: for: 3m - name: Nats server down description: NATS server has been down for more than 5 minutes - query: "absent(gnatsd_connz_total)" + query: "absent(up{job="nats"})" severity: critical for: 5m - name: Nats high CPU usage @@ -1577,8 +1577,8 @@ groups: query: "gnatsd_connz_subscriptions > 1000" severity: warning for: 5m - - name: Nats high pending messages - description: NATS server has more than 100,000 pending messages + - name: Nats high pending bytes + description: NATS server has more than 100,000 pending bytes query: "gnatsd_connz_pending_bytes > 100000" severity: warning for: 5m @@ -1598,8 +1598,8 @@ groups: severity: warning for: 5m - name: Nats max payload size exceeded - description: The max payload size allowed by NATS has been exceeded - query: "max(gnatsd_varz_max_payload) > 1000000" + description: The max payload size allowed by NATS has been exceeded (1MB) + query: "max(gnatsd_varz_max_payload) > 1024 * 1024" severity: critical for: 5m - name: Nats leaf node connection issue