From 267c3e8e70db9f39aa3f7cb697d58aaa3d790fbe Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Mon, 29 Apr 2024 22:35:43 +0200
Subject: [PATCH 01/32] Update rules.yml

---
 _data/rules.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_data/rules.yml b/_data/rules.yml
index 3dc5c15..838591e 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -199,6 +199,7 @@ groups:
                 description: "{{ $labels.instance }}: Device error with the {{ $labels.mountpoint }} filesystem"
                 query: "node_filesystem_device_error == 1"
                 severity: critical
+                for: 2m
               - name: Host inodes will fill in 24 hours
                 description: Filesystem is predicted to run out of inodes within the next 24 hours at current write rate
                 query: '(node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{fstype!="msdosfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{fstype!="msdosfs"} == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'

From b77cb3467c1de3dd45d35e8d4e459a10df544628 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Mon, 29 Apr 2024 20:36:49 +0000
Subject: [PATCH 02/32] Publish

---
 dist/rules/host-and-hardware/node-exporter.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dist/rules/host-and-hardware/node-exporter.yml b/dist/rules/host-and-hardware/node-exporter.yml
index de48231..6655ef7 100644
--- a/dist/rules/host-and-hardware/node-exporter.yml
+++ b/dist/rules/host-and-hardware/node-exporter.yml
@@ -96,7 +96,7 @@ groups:
 
     - alert: HostFilesystemDeviceError
       expr: 'node_filesystem_device_error == 1'
-      for: 0m
+      for: 2m
       labels:
         severity: critical
       annotations:

From aad1c4cd959a4713cba9e614513d20ddcaf9cd93 Mon Sep 17 00:00:00 2001
From: Sergey Shtoltz <shtoltz@users.noreply.github.com>
Date: Thu, 2 May 2024 21:48:46 +0300
Subject: [PATCH 03/32] RedisOutOfConfiguredMaxmemory: checking if memory limit
 is set (#410)

---
 _data/rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index 838591e..109b0c8 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -821,7 +821,7 @@ groups:
                   The exporter must be started with --include-system-metrics flag or REDIS_EXPORTER_INCL_SYSTEM_METRICS=true environment variable.
               - name: Redis out of configured maxmemory
                 description: Redis is running out of configured maxmemory (> 90%)
-                query: "redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90"
+                query: "redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90 and on(instance) redis_memory_max_bytes > 0"
                 severity: warning
                 for: 2m
               - name: Redis too many connections

From 5c0963558a1165f89bf2c0216c52512fb07602da Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Thu, 2 May 2024 18:49:56 +0000
Subject: [PATCH 04/32] Publish

---
 dist/rules/redis/oliver006-redis-exporter.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dist/rules/redis/oliver006-redis-exporter.yml b/dist/rules/redis/oliver006-redis-exporter.yml
index 08cdf23..6b4dd8d 100644
--- a/dist/rules/redis/oliver006-redis-exporter.yml
+++ b/dist/rules/redis/oliver006-redis-exporter.yml
@@ -77,7 +77,7 @@ groups:
         description: "Redis is running out of system memory (> 90%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: RedisOutOfConfiguredMaxmemory
-      expr: 'redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90'
+      expr: 'redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90 and on(instance) redis_memory_max_bytes > 0'
       for: 2m
       labels:
         severity: warning

From 59e6a9165dfb5dd23a3a3eafdf75210a818982e8 Mon Sep 17 00:00:00 2001
From: enesyalinkaya <49714068+enesyalinkaya@users.noreply.github.com>
Date: Mon, 6 May 2024 02:32:00 +0300
Subject: [PATCH 05/32] add new alerts for elasticsearch rules.yml (#411)

This commit adds new Prometheus alert definitions to monitor indexing and query metrics in Elasticsearch clusters. These alerts are essential for detecting performance issues related to indexing and querying activities.
---
 _data/rules.yml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/_data/rules.yml b/_data/rules.yml
index 109b0c8..744bf10 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -1128,6 +1128,26 @@ groups:
                 description: No new documents for 10 min!
                 query: 'increase(elasticsearch_indices_indexing_index_total{es_data_node="true"}[10m]) < 1'
                 severity: warning
+              - name: Elasticsearch High Indexing Latency
+                description: "The indexing latency on Elasticsearch cluster is higher than the threshold."
+                query: "elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005"
+                severity: warning
+                for: 10m       
+              - name: Elasticsearch High Indexing Rate
+                description: "The indexing rate on Elasticsearch cluster is higher than the threshold."
+                query: "elasticsearch_indices_indexing_index_total > 100000"
+                severity: warning
+                for: 5m     
+              - name: Elasticsearch High Query Rate
+                description: "The query rate on Elasticsearch cluster is higher than the threshold."
+                query: "elasticsearch_indices_search_query_total > 100000"
+                severity: warning
+                for: 5m
+              - name: Elasticsearch High Query Latency
+                description: "The query latency on Elasticsearch cluster is higher than the threshold."
+                query: "elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1"
+                severity: warning
+                for: 5m                 
 
       - name: Cassandra
         exporters:

From 515fca9c10898f728c116c9816a186c9d600a5b4 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Sun, 5 May 2024 23:33:11 +0000
Subject: [PATCH 06/32] Publish

---
 ...theus-community-elasticsearch-exporter.yml | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml b/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml
index 4ed5660..9aeadec 100644
--- a/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml
+++ b/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml
@@ -138,3 +138,39 @@ groups:
       annotations:
         summary: Elasticsearch no new documents (instance {{ $labels.instance }})
         description: "No new documents for 10 min!\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ElasticsearchHighIndexingLatency
+      expr: 'elasticsearch_indices_indexing_index_time_seconds_total / elasticsearch_indices_indexing_index_total > 0.0005'
+      for: 10m
+      labels:
+        severity: warning
+      annotations:
+        summary: Elasticsearch High Indexing Latency (instance {{ $labels.instance }})
+        description: "The indexing latency on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ElasticsearchHighIndexingRate
+      expr: 'elasticsearch_indices_indexing_index_total > 100000'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Elasticsearch High Indexing Rate (instance {{ $labels.instance }})
+        description: "The indexing rate on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ElasticsearchHighQueryRate
+      expr: 'elasticsearch_indices_search_query_total > 100000'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Elasticsearch High Query Rate (instance {{ $labels.instance }})
+        description: "The query rate on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ElasticsearchHighQueryLatency
+      expr: 'elasticsearch_indices_search_fetch_time_seconds / elasticsearch_indices_search_fetch_total > 1'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Elasticsearch High Query Latency (instance {{ $labels.instance }})
+        description: "The query latency on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

From 2547288c13ba21703dbd4be25e3c6e4618180255 Mon Sep 17 00:00:00 2001
From: Ali <115415312+xogoodnow@users.noreply.github.com>
Date: Mon, 13 May 2024 12:02:18 +0330
Subject: [PATCH 07/32] Added Clickhouse (#412)

* Added Clickhouse

* Update rules.yml

Added reasonable time periods for each query to avoid false positives and in some cased give the system a short window to try to solve the issue.
Also changed the severity level of authentication alerts from critical to info which seems more appropriate

* Modified time period for alerts embedded-exporter.yml

I made a few adjustments in time periods.
See if they seem reasonable or not

* Replication alerts time periods were adjusted

IMHO, replication alerts must be sent right away.
---
 _data/rules.yml                             |  82 ++++++++++++
 dist/rules/clickhouse/embedded-exporter.yml | 131 ++++++++++++++++++++
 2 files changed, 213 insertions(+)
 create mode 100644 dist/rules/clickhouse/embedded-exporter.yml

diff --git a/_data/rules.yml b/_data/rules.yml
index 744bf10..ffb4604 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -1300,6 +1300,88 @@ groups:
                 severity: critical
                 for: 2m
 
+      - name: Clickhouse
+        exporters:
+          - name: Embedded Exporter
+            slug: embedded-exporter
+            doc_url: https://clickhouse.com/docs/en/operations/system-tables/metrics
+            rules:
+              - name: ClickHouse Memory Usage Critical
+                description: Memory usage is critically high, over 90%.
+                query: "ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 90"
+                severity: critical
+                for: 5m
+              - name: ClickHouse Memory Usage Warning
+                description: Memory usage is over 80%.
+                query: "ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 80"
+                severity: warning
+                for: 5m
+              - name: ClickHouse Disk Space Low on Default
+                description: Disk space on default is below 20%.
+                query: "ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 20"
+                severity: warning
+                for: 2m
+              - name: ClickHouse Disk Space Critical on Default
+                description: Disk space on default disk is critically low, below 10%.
+                query: "ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 10"
+                severity: critical
+                for: 2m
+              - name: ClickHouse Disk Space Low on Backups
+                description: Disk space on backups is below 20%.
+                query: "ClickHouseAsyncMetrics_DiskAvailable_backups / (ClickHouseAsyncMetrics_DiskAvailable_backups + ClickHouseAsyncMetrics_DiskUsed_backups) * 100 < 20"
+                severity: warning
+                for: 2m
+              - name: ClickHouse Replica Errors
+                description: Critical replica errors detected, either all replicas are stale or lost.
+                query: "ClickHouseErrorMetric_ALL_REPLICAS_ARE_STALE == 1 or ClickHouseErrorMetric_ALL_REPLICAS_LOST == 1"
+                severity: critical
+                for: 0m
+              - name: ClickHouse No Available Replicas
+                description: No available replicas in ClickHouse.
+                query: "ClickHouseErrorMetric_NO_AVAILABLE_REPLICA == 1"
+                severity: critical
+                for: 0m
+              - name: ClickHouse No Live Replicas
+                description: There are too few live replicas available, risking data loss and service disruption.
+                query: "ClickHouseErrorMetric_TOO_FEW_LIVE_REPLICAS == 1"
+                severity: critical
+                for: 0m
+              - name: ClickHouse High Network Traffic
+                description: Network traffic is unusually high, may affect cluster performance.
+                query: "ClickHouseMetrics_NetworkSend > 250 or ClickHouseMetrics_NetworkReceive > 250"
+                severity: warning
+                for: 5m
+                comments: |
+                  Please replace the threshold with an appropriate value
+              - name: ClickHouse High TCP Connections
+                description: High number of TCP connections, indicating heavy client or inter-cluster communication.
+                query: "ClickHouseMetrics_TCPConnection > 400"
+                severity: warning
+                for: 5m
+                comments: |
+                  Please replace the threshold with an appropriate value
+              - name: ClickHouse Interserver Connection Issues
+                description: An increase in interserver connections may indicate replication or distributed query handling issues.
+                query: "increase(ClickHouseMetrics_InterserverConnection[5m]) > 0"
+                severity: warning
+                for: 1m
+              - name: ClickHouse ZooKeeper Connection Issues
+                description: ClickHouse is experiencing issues with ZooKeeper connections, which may affect cluster state and coordination.
+                query: "avg(ClickHouseMetrics_ZooKeeperSession) != 1"
+                severity: warning
+                for: 3m
+              - name: ClickHouse Authentication Failures
+                description: Authentication failures detected, indicating potential security issues or misconfiguration.
+                query: "increase(ClickHouseErrorMetric_AUTHENTICATION_FAILED[5m]) > 0"
+                severity: info
+                for: 0m
+              - name: ClickHouse Access Denied Errors
+                description: Access denied errors have been logged, which could indicate permission issues or unauthorized access attempts.
+                query: "increase(ClickHouseErrorMetric_RESOURCE_ACCESS_DENIED[5m]) > 0"
+                severity: info
+                for: 0m
+
+
       - name: Zookeeper
         exporters:
           - name: cloudflare/kafka_zookeeper_exporter
diff --git a/dist/rules/clickhouse/embedded-exporter.yml b/dist/rules/clickhouse/embedded-exporter.yml
new file mode 100644
index 0000000..19917bb
--- /dev/null
+++ b/dist/rules/clickhouse/embedded-exporter.yml
@@ -0,0 +1,131 @@
+groups:
+- name: EmbeddedExporter
+  rules:
+    - alert: ClickHouseMemoryUsageCritical
+      expr: 'ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 90'
+      for: 5m
+      labels:
+        severity: critical
+      annotations:
+        summary: ClickHouse Memory Usage Critical (instance {{ $labels.instance }})
+        description: "Memory usage is critically high, over 90%.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseMemoryUsageWarning
+      expr: 'ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 80'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: ClickHouse Memory Usage Warning (instance {{ $labels.instance }})
+        description: "Memory usage is over 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseDiskSpaceLowDefault
+      expr: 'ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 20'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: ClickHouse Disk Space Low on Default (instance {{ $labels.instance }})
+        description: "Disk space on default is below 20%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseDiskSpaceCriticalDefault
+      expr: 'ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 10'
+      for: 2m
+      labels:
+        severity: critical
+      annotations:
+        summary: ClickHouse Disk Space Critical on Default Disk (instance {{ $labels.instance }})
+        description: "Disk space on default disk is critically low, below 10%.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseDiskSpaceLowBackups
+      expr: 'ClickHouseAsyncMetrics_DiskAvailable_backups / (ClickHouseAsyncMetrics_DiskAvailable_backups + ClickHouseAsyncMetrics_DiskUsed_backups) * 100 < 20'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: ClickHouse Disk Space Low on Backups (instance {{ $labels.instance }})
+        description: "Disk space on backups is below 20%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseReplicaErrors
+      expr: 'ClickHouseErrorMetric_ALL_REPLICAS_ARE_STALE == 1 or ClickHouseErrorMetric_ALL_REPLICAS_LOST == 1'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: ClickHouse Replica Errors Detected (instance {{ $labels.instance }})
+        description: "Critical replica errors detected, either all replicas are stale or lost.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseNoAvailableReplicas
+      expr: 'ClickHouseErrorMetric_NO_AVAILABLE_REPLICA == 1'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: No Available Replicas in ClickHouse (instance {{ $labels.instance }})
+        description: "No available replicas in ClickHouse.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseNoLiveReplicas
+      expr: 'ClickHouseErrorMetric_TOO_FEW_LIVE_REPLICAS == 1'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: No Live Replicas in ClickHouse (instance {{ $labels.instance }})
+        description: "There are too few live replicas available, risking data loss and service disruption.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+
+    - alert: ClickHouseNetworkUsageHigh
+      expr: 'ClickHouseMetrics_NetworkSend > 1000 or ClickHouseMetrics_NetworkReceive > 1000'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: High Network Traffic in ClickHouse (instance {{ $labels.instance }})
+        description: "Network traffic is unusually high, may affect cluster performance.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseHighTCPConnections
+      expr: 'ClickHouseMetrics_TCPConnection > 1500'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: High TCP Connections in ClickHouse (instance {{ $labels.instance }})
+        description: "High number of TCP connections, indicating heavy client or inter-cluster communication.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseInterserverConnectionIssues
+      expr: 'increase(ClickHouseMetrics_InterserverConnection[5m]) > 0'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Interserver Connection Issues in ClickHouse (instance {{ $labels.instance }})
+        description: "An increase in interserver connections may indicate replication or distributed query handling issues.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseZooKeeperConnectionIssues
+      expr: 'avg(ClickHouseMetrics_ZooKeeperSession) != 1'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: ZooKeeper Connection Issues in ClickHouse (instance {{ $labels.instance }})
+        description: "ClickHouse is experiencing issues with ZooKeeper connections, which may affect cluster state and coordination.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseAuthenticationFailures
+      expr: 'increase(ClickHouseErrorMetric_AUTHENTICATION_FAILED[5m]) > 0'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Authentication Failures in ClickHouse (instance {{ $labels.instance }})
+        description: "Authentication failures detected, indicating potential security issues or misconfiguration.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ClickHouseAccessDeniedErrors
+      expr: 'increase(ClickHouseErrorMetric_RESOURCE_ACCESS_DENIED[5m]) > 0'
+      for: 1m
+      labels:
+        severity: critical
+      annotations:
+        summary: Access Denied Errors in ClickHouse (instance {{ $labels.instance }})
+        description: "Access denied errors have been logged, which could indicate permission issues or unauthorized access attempts.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+

From 84b0569c97975361b600f25aa90d5fc1e583bd87 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Mon, 13 May 2024 08:33:30 +0000
Subject: [PATCH 08/32] Publish

---
 dist/rules/clickhouse/embedded-exporter.yml | 74 ++++++++++-----------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/dist/rules/clickhouse/embedded-exporter.yml b/dist/rules/clickhouse/embedded-exporter.yml
index 19917bb..3efe551 100644
--- a/dist/rules/clickhouse/embedded-exporter.yml
+++ b/dist/rules/clickhouse/embedded-exporter.yml
@@ -1,7 +1,10 @@
 groups:
+
 - name: EmbeddedExporter
+
   rules:
-    - alert: ClickHouseMemoryUsageCritical
+
+    - alert: ClickhouseMemoryUsageCritical
       expr: 'ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 90'
       for: 5m
       labels:
@@ -10,122 +13,119 @@ groups:
         summary: ClickHouse Memory Usage Critical (instance {{ $labels.instance }})
         description: "Memory usage is critically high, over 90%.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseMemoryUsageWarning
+    - alert: ClickhouseMemoryUsageWarning
       expr: 'ClickHouseAsyncMetrics_CGroupMemoryUsed / ClickHouseAsyncMetrics_CGroupMemoryTotal * 100 > 80'
       for: 5m
       labels:
         severity: warning
       annotations:
         summary: ClickHouse Memory Usage Warning (instance {{ $labels.instance }})
-        description: "Memory usage is over 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+        description: "Memory usage is over 80%.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseDiskSpaceLowDefault
+    - alert: ClickhouseDiskSpaceLowOnDefault
       expr: 'ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 20'
       for: 2m
       labels:
         severity: warning
       annotations:
         summary: ClickHouse Disk Space Low on Default (instance {{ $labels.instance }})
-        description: "Disk space on default is below 20%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+        description: "Disk space on default is below 20%.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseDiskSpaceCriticalDefault
+    - alert: ClickhouseDiskSpaceCriticalOnDefault
       expr: 'ClickHouseAsyncMetrics_DiskAvailable_default / (ClickHouseAsyncMetrics_DiskAvailable_default + ClickHouseAsyncMetrics_DiskUsed_default) * 100 < 10'
       for: 2m
       labels:
         severity: critical
       annotations:
-        summary: ClickHouse Disk Space Critical on Default Disk (instance {{ $labels.instance }})
+        summary: ClickHouse Disk Space Critical on Default (instance {{ $labels.instance }})
         description: "Disk space on default disk is critically low, below 10%.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseDiskSpaceLowBackups
+    - alert: ClickhouseDiskSpaceLowOnBackups
       expr: 'ClickHouseAsyncMetrics_DiskAvailable_backups / (ClickHouseAsyncMetrics_DiskAvailable_backups + ClickHouseAsyncMetrics_DiskUsed_backups) * 100 < 20'
       for: 2m
       labels:
         severity: warning
       annotations:
         summary: ClickHouse Disk Space Low on Backups (instance {{ $labels.instance }})
-        description: "Disk space on backups is below 20%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+        description: "Disk space on backups is below 20%.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseReplicaErrors
+    - alert: ClickhouseReplicaErrors
       expr: 'ClickHouseErrorMetric_ALL_REPLICAS_ARE_STALE == 1 or ClickHouseErrorMetric_ALL_REPLICAS_LOST == 1'
       for: 0m
       labels:
         severity: critical
       annotations:
-        summary: ClickHouse Replica Errors Detected (instance {{ $labels.instance }})
+        summary: ClickHouse Replica Errors (instance {{ $labels.instance }})
         description: "Critical replica errors detected, either all replicas are stale or lost.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseNoAvailableReplicas
+    - alert: ClickhouseNoAvailableReplicas
       expr: 'ClickHouseErrorMetric_NO_AVAILABLE_REPLICA == 1'
       for: 0m
       labels:
         severity: critical
       annotations:
-        summary: No Available Replicas in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse No Available Replicas (instance {{ $labels.instance }})
         description: "No available replicas in ClickHouse.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseNoLiveReplicas
+    - alert: ClickhouseNoLiveReplicas
       expr: 'ClickHouseErrorMetric_TOO_FEW_LIVE_REPLICAS == 1'
       for: 0m
       labels:
         severity: critical
       annotations:
-        summary: No Live Replicas in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse No Live Replicas (instance {{ $labels.instance }})
         description: "There are too few live replicas available, risking data loss and service disruption.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-
-    - alert: ClickHouseNetworkUsageHigh
-      expr: 'ClickHouseMetrics_NetworkSend > 1000 or ClickHouseMetrics_NetworkReceive > 1000'
+    - alert: ClickhouseHighNetworkTraffic
+      expr: 'ClickHouseMetrics_NetworkSend > 250 or ClickHouseMetrics_NetworkReceive > 250'
       for: 5m
       labels:
         severity: warning
       annotations:
-        summary: High Network Traffic in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse High Network Traffic (instance {{ $labels.instance }})
         description: "Network traffic is unusually high, may affect cluster performance.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseHighTCPConnections
-      expr: 'ClickHouseMetrics_TCPConnection > 1500'
+    - alert: ClickhouseHighTcpConnections
+      expr: 'ClickHouseMetrics_TCPConnection > 400'
       for: 5m
       labels:
         severity: warning
       annotations:
-        summary: High TCP Connections in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse High TCP Connections (instance {{ $labels.instance }})
         description: "High number of TCP connections, indicating heavy client or inter-cluster communication.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseInterserverConnectionIssues
+    - alert: ClickhouseInterserverConnectionIssues
       expr: 'increase(ClickHouseMetrics_InterserverConnection[5m]) > 0'
-      for: 0m
+      for: 1m
       labels:
         severity: warning
       annotations:
-        summary: Interserver Connection Issues in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse Interserver Connection Issues (instance {{ $labels.instance }})
         description: "An increase in interserver connections may indicate replication or distributed query handling issues.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseZooKeeperConnectionIssues
+    - alert: ClickhouseZookeeperConnectionIssues
       expr: 'avg(ClickHouseMetrics_ZooKeeperSession) != 1'
-      for: 5m
+      for: 3m
       labels:
         severity: warning
       annotations:
-        summary: ZooKeeper Connection Issues in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse ZooKeeper Connection Issues (instance {{ $labels.instance }})
         description: "ClickHouse is experiencing issues with ZooKeeper connections, which may affect cluster state and coordination.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseAuthenticationFailures
+    - alert: ClickhouseAuthenticationFailures
       expr: 'increase(ClickHouseErrorMetric_AUTHENTICATION_FAILED[5m]) > 0'
       for: 0m
       labels:
-        severity: critical
+        severity: info
       annotations:
-        summary: Authentication Failures in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse Authentication Failures (instance {{ $labels.instance }})
         description: "Authentication failures detected, indicating potential security issues or misconfiguration.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: ClickHouseAccessDeniedErrors
+    - alert: ClickhouseAccessDeniedErrors
       expr: 'increase(ClickHouseErrorMetric_RESOURCE_ACCESS_DENIED[5m]) > 0'
-      for: 1m
+      for: 0m
       labels:
-        severity: critical
+        severity: info
       annotations:
-        summary: Access Denied Errors in ClickHouse (instance {{ $labels.instance }})
+        summary: ClickHouse Access Denied Errors (instance {{ $labels.instance }})
         description: "Access denied errors have been logged, which could indicate permission issues or unauthorized access attempts.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-

From 847143ecc94909ff05ebff39b83a91272122a68b Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Mon, 13 May 2024 10:42:04 +0200
Subject: [PATCH 09/32] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index c5c241c..16c92c7 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,7 @@ Collection available here: **[https://samber.github.io/awesome-prometheus-alerts
 - [RabbitMQ](https://samber.github.io/awesome-prometheus-alerts/rules#rabbitmq)
 - [Elasticsearch](https://samber.github.io/awesome-prometheus-alerts/rules#elasticsearch)
 - [Cassandra](https://samber.github.io/awesome-prometheus-alerts/rules#cassandra)
+- [Clickhouse](https://samber.github.io/awesome-prometheus-alerts/rules#clickhouse)
 - [Zookeeper](https://samber.github.io/awesome-prometheus-alerts/rules#zookeeper)
 - [Kafka](https://samber.github.io/awesome-prometheus-alerts/rules#kafka)
 - [Pulsar](https://samber.github.io/awesome-prometheus-alerts/rules#pulsar)

From 870bbd47d2d9b09dee7e286961c3b3043976a76c Mon Sep 17 00:00:00 2001
From: Vijay Dharap <VDHARAP@volvocars.com>
Date: Mon, 13 May 2024 09:10:55 +0000
Subject: [PATCH 10/32] Fixed HPA rule to use more correct condition (#408)

* Fixed HPA rule to use more correct condition

* Update rules.yml

---------

Co-authored-by: Samuel Berthe <dev@samuel-berthe.fr>
---
 _data/rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index ffb4604..3f29fb0 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -1981,7 +1981,7 @@ groups:
                 for: 1m
               - name: Kubernetes HPA scale inability
                 description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} is unable to scale
-                query: 'kube_horizontalpodautoscaler_status_condition{status="false", condition="AbleToScale"} == 1'
+                query: '(kube_horizontalpodautoscaler_spec_max_replicas - kube_horizontalpodautoscaler_status_desired_replicas) * on (horizontalpodautoscaler,namespace) (kube_horizontalpodautoscaler_status_condition{condition="ScalingLimited", status="true"} == 1) == 0'
                 severity: warning
                 for: 2m
               - name: Kubernetes HPA metrics unavailability

From 613401a9600b5d8f31ec0a4890371b978becee62 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Mon, 13 May 2024 09:12:01 +0000
Subject: [PATCH 11/32] Publish

---
 dist/rules/kubernetes/kubestate-exporter.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dist/rules/kubernetes/kubestate-exporter.yml b/dist/rules/kubernetes/kubestate-exporter.yml
index e43a1fb..9014275 100644
--- a/dist/rules/kubernetes/kubestate-exporter.yml
+++ b/dist/rules/kubernetes/kubestate-exporter.yml
@@ -122,7 +122,7 @@ groups:
         description: "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} went down\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesHpaScaleInability
-      expr: 'kube_horizontalpodautoscaler_status_condition{status="false", condition="AbleToScale"} == 1'
+      expr: '(kube_horizontalpodautoscaler_spec_max_replicas - kube_horizontalpodautoscaler_status_desired_replicas) * on (horizontalpodautoscaler,namespace) (kube_horizontalpodautoscaler_status_condition{condition="ScalingLimited", status="true"} == 1) == 0'
       for: 2m
       labels:
         severity: warning

From 396083a2a1daabeb6b7b60a29c5c0ec3eef215b7 Mon Sep 17 00:00:00 2001
From: Florian Schlichting <fsfs@debian.org>
Date: Mon, 13 May 2024 12:09:04 +0200
Subject: [PATCH 12/32] Fix HaproxyBackendMaxActiveSession: look at current /
 limit (#413)

haproxy_backend_max_sessions is the maximum number of sessions ever encountered during the lifetime of the HAProxy process. That is, it will never go down until HAProxy is restarted, so the alert continues to fire even though the situation has cleared!

This doesn't make sense. Look at the currently active sessions instead.
---
 _data/rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index 3f29fb0..61425a5 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -1785,7 +1785,7 @@ groups:
                 severity: critical
               - name: HAProxy backend max active session
                 description: HAproxy backend {{ $labels.fqdn }}/{{ $labels.backend }} is reaching session limit (> 80%).
-                query: "((sum by (backend) (avg_over_time(haproxy_backend_max_sessions[2m]) * 100) / sum by (backend) (avg_over_time(haproxy_backend_limit_sessions[2m])))) > 80"
+                query: "((sum by (backend) (avg_over_time(haproxy_backend_current_sessions[2m]) * 100) / sum by (backend) (avg_over_time(haproxy_backend_limit_sessions[2m])))) > 80"
                 severity: warning
                 for: 2m
               - name: HAProxy pending requests

From 04886da968b4686de0af0c19f0fb4baa05f8265e Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Mon, 13 May 2024 10:10:12 +0000
Subject: [PATCH 13/32] Publish

---
 dist/rules/haproxy/haproxy-exporter-v1.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dist/rules/haproxy/haproxy-exporter-v1.yml b/dist/rules/haproxy/haproxy-exporter-v1.yml
index 2b2f93f..7be81a0 100644
--- a/dist/rules/haproxy/haproxy-exporter-v1.yml
+++ b/dist/rules/haproxy/haproxy-exporter-v1.yml
@@ -77,7 +77,7 @@ groups:
         description: "Too many connection errors to {{ $labels.server }} server (> 100 req/s). Request throughput may be too high.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: HaproxyBackendMaxActiveSession
-      expr: '((sum by (backend) (avg_over_time(haproxy_backend_max_sessions[2m]) * 100) / sum by (backend) (avg_over_time(haproxy_backend_limit_sessions[2m])))) > 80'
+      expr: '((sum by (backend) (avg_over_time(haproxy_backend_current_sessions[2m]) * 100) / sum by (backend) (avg_over_time(haproxy_backend_limit_sessions[2m])))) > 80'
       for: 2m
       labels:
         severity: warning

From 4963331101e42b4a799978891a5c9c5f927623fb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 14 May 2024 01:41:57 +0200
Subject: [PATCH 14/32] build(deps-dev): bump nokogiri from 1.16.2 to 1.16.5
 (#415)

Bumps [nokogiri](https://github.com/sparklemotion/nokogiri) from 1.16.2 to 1.16.5.
- [Release notes](https://github.com/sparklemotion/nokogiri/releases)
- [Changelog](https://github.com/sparklemotion/nokogiri/blob/main/CHANGELOG.md)
- [Commits](https://github.com/sparklemotion/nokogiri/compare/v1.16.2...v1.16.5)

---
updated-dependencies:
- dependency-name: nokogiri
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Gemfile.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Gemfile.lock b/Gemfile.lock
index df07b1f..2f8e470 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -231,7 +231,7 @@ GEM
       jekyll-seo-tag (~> 2.1)
     minitest (5.17.0)
     multipart-post (2.1.1)
-    nokogiri (1.16.2-x86_64-linux)
+    nokogiri (1.16.5-x86_64-linux)
       racc (~> 1.4)
     octokit (4.22.0)
       faraday (>= 0.9)

From 8460f9008e1eb191bb62d445ea17709698ce63db Mon Sep 17 00:00:00 2001
From: "R.Sicart" <roger.sicart@gmail.com>
Date: Tue, 14 May 2024 20:34:43 +0200
Subject: [PATCH 15/32] fix: some kube api alert lint (#416)

* fix: apiserver regexp matchers are automatically fully anchored

Signed-off-by: R.Sicart <roger.sicart@gmail.com>

* fix: apiserver errors alert is using  label but the query removes it

Signed-off-by: R.Sicart <roger.sicart@gmail.com>

* fix: apiserver latency alert is using  label but the query removes it

Signed-off-by: R.Sicart <roger.sicart@gmail.com>

---------

Signed-off-by: R.Sicart <roger.sicart@gmail.com>
---
 _data/rules.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index 61425a5..eb87723 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -2071,7 +2071,7 @@ groups:
                 for: 12h
               - name: Kubernetes API server errors
                 description: Kubernetes API server is experiencing high error rate
-                query: 'sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[1m])) / sum(rate(apiserver_request_total{job="apiserver"}[1m])) * 100 > 3'
+                query: 'sum(rate(apiserver_request_total{job="apiserver",code=~"(?:5..)"}[1m])) by (instance, job) / sum(rate(apiserver_request_total{job="apiserver"}[1m])) by (instance, job) * 100 > 3'
                 severity: critical
                 for: 2m
               - name: Kubernetes API client errors
@@ -2089,7 +2089,7 @@ groups:
                 severity: critical
               - name: Kubernetes API server latency
                 description: "Kubernetes API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}."
-                query: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{subresource!="log",verb!~"^(?:CONNECT|WATCHLIST|WATCH|PROXY)$"} [10m])) WITHOUT (instance, resource)) > 1'
+                query: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~"(?:CONNECT|WATCHLIST|WATCH|PROXY)"} [10m])) WITHOUT (subresource)) > 1'
                 severity: warning
                 for: 2m
 

From 81079a2a7e9923ce369b35301ddcbf660bea1f09 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Tue, 14 May 2024 18:35:54 +0000
Subject: [PATCH 16/32] Publish

---
 dist/rules/kubernetes/kubestate-exporter.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dist/rules/kubernetes/kubestate-exporter.yml b/dist/rules/kubernetes/kubestate-exporter.yml
index 9014275..3f9dc6a 100644
--- a/dist/rules/kubernetes/kubestate-exporter.yml
+++ b/dist/rules/kubernetes/kubestate-exporter.yml
@@ -266,7 +266,7 @@ groups:
         description: "Kubernetes Job {{ $labels.namespace }}/{{ $labels.job_name }} did not complete in time.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesApiServerErrors
-      expr: 'sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[1m])) / sum(rate(apiserver_request_total{job="apiserver"}[1m])) * 100 > 3'
+      expr: 'sum(rate(apiserver_request_total{job="apiserver",code=~"(?:5..)"}[1m])) by (instance, job) / sum(rate(apiserver_request_total{job="apiserver"}[1m])) by (instance, job) * 100 > 3'
       for: 2m
       labels:
         severity: critical
@@ -302,7 +302,7 @@ groups:
         description: "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesApiServerLatency
-      expr: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{subresource!="log",verb!~"^(?:CONNECT|WATCHLIST|WATCH|PROXY)$"} [10m])) WITHOUT (instance, resource)) > 1'
+      expr: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~"(?:CONNECT|WATCHLIST|WATCH|PROXY)"} [10m])) WITHOUT (subresource)) > 1'
       for: 2m
       labels:
         severity: warning

From 262e45162569863ea1784dcfc68066be32fcbe71 Mon Sep 17 00:00:00 2001
From: "R.Sicart" <roger.sicart@gmail.com>
Date: Tue, 14 May 2024 20:43:00 +0200
Subject: [PATCH 17/32] kube hpa lint and improvement (#417)

* fix: hpa alerts are using  label but the queries remove it

Signed-off-by: R.Sicart <roger.sicart@gmail.com>

* fix: hpa alert is using  label but the query removes it

Signed-off-by: R.Sicart <roger.sicart@gmail.com>

* feat: hpa scale max should not alert when min and max are the same

Signed-off-by: R.Sicart <roger.sicart@gmail.com>

---------

Signed-off-by: R.Sicart <roger.sicart@gmail.com>
---
 _data/rules.yml                              |  2 +-
 dist/rules/kubernetes/kubestate-exporter.yml | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index eb87723..4d0d9f4 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -1990,7 +1990,7 @@ groups:
                 severity: warning
               - name: Kubernetes HPA scale maximum
                 description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has hit maximum number of desired pods
-                query: "kube_horizontalpodautoscaler_status_desired_replicas >= kube_horizontalpodautoscaler_spec_max_replicas"
+                query: '(kube_horizontalpodautoscaler_status_desired_replicas >= kube_horizontalpodautoscaler_spec_max_replicas) and (kube_horizontalpodautoscaler_spec_max_replicas > 1) and (kube_horizontalpodautoscaler_spec_min_replicas != kube_horizontalpodautoscaler_spec_max_replicas)'
                 severity: info
                 for: 2m
               - name: Kubernetes HPA underutilized
diff --git a/dist/rules/kubernetes/kubestate-exporter.yml b/dist/rules/kubernetes/kubestate-exporter.yml
index 3f9dc6a..8684fdf 100644
--- a/dist/rules/kubernetes/kubestate-exporter.yml
+++ b/dist/rules/kubernetes/kubestate-exporter.yml
@@ -127,7 +127,7 @@ groups:
       labels:
         severity: warning
       annotations:
-        summary: Kubernetes HPA scale inability (instance {{ $labels.instance }})
+        summary: Kubernetes HPA scale inability ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} is unable to scale\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesHpaMetricsUnavailability
@@ -136,7 +136,7 @@ groups:
       labels:
         severity: warning
       annotations:
-        summary: Kubernetes HPA metrics unavailability (instance {{ $labels.instance }})
+        summary: Kubernetes HPA metrics unavailability ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} is unable to collect metrics\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesHpaScaleMaximum
@@ -145,16 +145,16 @@ groups:
       labels:
         severity: info
       annotations:
-        summary: Kubernetes HPA scale maximum (instance {{ $labels.instance }})
+        summary: Kubernetes HPA scale maximum ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has hit maximum number of desired pods\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesHpaUnderutilized
-      expr: 'max(quantile_over_time(0.5, kube_horizontalpodautoscaler_status_desired_replicas[1d]) == kube_horizontalpodautoscaler_spec_min_replicas) by (horizontalpodautoscaler) > 3'
+      expr: 'max(quantile_over_time(0.5, kube_horizontalpodautoscaler_status_desired_replicas[1d]) == kube_horizontalpodautoscaler_spec_min_replicas) by (horizontalpodautoscaler, namespace) > 3'
       for: 0m
       labels:
         severity: info
       annotations:
-        summary: Kubernetes HPA underutilized (instance {{ $labels.instance }})
+        summary: Kubernetes HPA underutilized ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} is constantly at minimum replicas for 50% of the time. Potential cost saving here.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesPodNotHealthy

From 826be5877ffc129ca3511cd25af87d67ea67fb48 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Tue, 14 May 2024 18:44:11 +0000
Subject: [PATCH 18/32] Publish

---
 dist/rules/kubernetes/kubestate-exporter.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/dist/rules/kubernetes/kubestate-exporter.yml b/dist/rules/kubernetes/kubestate-exporter.yml
index 8684fdf..2db1d64 100644
--- a/dist/rules/kubernetes/kubestate-exporter.yml
+++ b/dist/rules/kubernetes/kubestate-exporter.yml
@@ -127,7 +127,7 @@ groups:
       labels:
         severity: warning
       annotations:
-        summary: Kubernetes HPA scale inability ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
+        summary: Kubernetes HPA scale inability (instance {{ $labels.instance }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} is unable to scale\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesHpaMetricsUnavailability
@@ -136,25 +136,25 @@ groups:
       labels:
         severity: warning
       annotations:
-        summary: Kubernetes HPA metrics unavailability ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
+        summary: Kubernetes HPA metrics unavailability (instance {{ $labels.instance }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} is unable to collect metrics\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesHpaScaleMaximum
-      expr: 'kube_horizontalpodautoscaler_status_desired_replicas >= kube_horizontalpodautoscaler_spec_max_replicas'
+      expr: '(kube_horizontalpodautoscaler_status_desired_replicas >= kube_horizontalpodautoscaler_spec_max_replicas) and (kube_horizontalpodautoscaler_spec_max_replicas > 1) and (kube_horizontalpodautoscaler_spec_min_replicas != kube_horizontalpodautoscaler_spec_max_replicas)'
       for: 2m
       labels:
         severity: info
       annotations:
-        summary: Kubernetes HPA scale maximum ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
+        summary: Kubernetes HPA scale maximum (instance {{ $labels.instance }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has hit maximum number of desired pods\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesHpaUnderutilized
-      expr: 'max(quantile_over_time(0.5, kube_horizontalpodautoscaler_status_desired_replicas[1d]) == kube_horizontalpodautoscaler_spec_min_replicas) by (horizontalpodautoscaler, namespace) > 3'
+      expr: 'max(quantile_over_time(0.5, kube_horizontalpodautoscaler_status_desired_replicas[1d]) == kube_horizontalpodautoscaler_spec_min_replicas) by (horizontalpodautoscaler) > 3'
       for: 0m
       labels:
         severity: info
       annotations:
-        summary: Kubernetes HPA underutilized ({{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }})
+        summary: Kubernetes HPA underutilized (instance {{ $labels.instance }})
         description: "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} is constantly at minimum replicas for 50% of the time. Potential cost saving here.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: KubernetesPodNotHealthy

From 9877561b6cfc2c145db1b7de6f85c084989be10d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Enes=20Yal=C4=B1nkaya?=
 <49714068+enesyalinkaya@users.noreply.github.com>
Date: Wed, 15 May 2024 09:07:55 +0300
Subject: [PATCH 19/32] fix elasticsearch rate rules (#418)

* fix elasticsearch rate rules

* fix

* fix

* fix
---
 _data/rules.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index 4d0d9f4..bf8ee70 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -1135,12 +1135,12 @@ groups:
                 for: 10m       
               - name: Elasticsearch High Indexing Rate
                 description: "The indexing rate on Elasticsearch cluster is higher than the threshold."
-                query: "elasticsearch_indices_indexing_index_total > 100000"
+                query: "sum(rate(elasticsearch_indices_indexing_index_total[1m]))> 100000"
                 severity: warning
                 for: 5m     
               - name: Elasticsearch High Query Rate
                 description: "The query rate on Elasticsearch cluster is higher than the threshold."
-                query: "elasticsearch_indices_search_query_total > 100000"
+                query: "sum(rate(elasticsearch_indices_search_query_total[1m])) > 100000"
                 severity: warning
                 for: 5m
               - name: Elasticsearch High Query Latency

From 1adecd9ee79ce65ec546ad77d6d47ab04259f689 Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Wed, 15 May 2024 08:08:58 +0200
Subject: [PATCH 20/32] Update rules.yml

---
 _data/rules.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index bf8ee70..a41d8a2 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -1125,7 +1125,7 @@ groups:
                 severity: warning
                 for: 15m
               - name: Elasticsearch no new documents
-                description: No new documents for 10 min!
+                description: "No new documents for 10 min!"
                 query: 'increase(elasticsearch_indices_indexing_index_total{es_data_node="true"}[10m]) < 1'
                 severity: warning
               - name: Elasticsearch High Indexing Latency
@@ -1135,12 +1135,12 @@ groups:
                 for: 10m       
               - name: Elasticsearch High Indexing Rate
                 description: "The indexing rate on Elasticsearch cluster is higher than the threshold."
-                query: "sum(rate(elasticsearch_indices_indexing_index_total[1m]))> 100000"
+                query: "sum(rate(elasticsearch_indices_indexing_index_total[1m]))> 10000"
                 severity: warning
                 for: 5m     
               - name: Elasticsearch High Query Rate
                 description: "The query rate on Elasticsearch cluster is higher than the threshold."
-                query: "sum(rate(elasticsearch_indices_search_query_total[1m])) > 100000"
+                query: "sum(rate(elasticsearch_indices_search_query_total[1m])) > 100"
                 severity: warning
                 for: 5m
               - name: Elasticsearch High Query Latency

From 7dd767c4b4e4bcc2fa8d2b86cee89402f80acacd Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Wed, 15 May 2024 06:10:06 +0000
Subject: [PATCH 21/32] Publish

---
 .../prometheus-community-elasticsearch-exporter.yml           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml b/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml
index 9aeadec..5e6bb9d 100644
--- a/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml
+++ b/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml
@@ -149,7 +149,7 @@ groups:
         description: "The indexing latency on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: ElasticsearchHighIndexingRate
-      expr: 'elasticsearch_indices_indexing_index_total > 100000'
+      expr: 'sum(rate(elasticsearch_indices_indexing_index_total[1m]))> 10000'
       for: 5m
       labels:
         severity: warning
@@ -158,7 +158,7 @@ groups:
         description: "The indexing rate on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: ElasticsearchHighQueryRate
-      expr: 'elasticsearch_indices_search_query_total > 100000'
+      expr: 'sum(rate(elasticsearch_indices_search_query_total[1m])) > 100'
       for: 5m
       labels:
         severity: warning

From 61a40270d96ba9d7ae6489254f9dad2775f8e00c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 16 May 2024 23:28:17 +0200
Subject: [PATCH 22/32] build(deps-dev): bump rexml from 3.2.5 to 3.2.8 (#420)

Bumps [rexml](https://github.com/ruby/rexml) from 3.2.5 to 3.2.8.
- [Release notes](https://github.com/ruby/rexml/releases)
- [Changelog](https://github.com/ruby/rexml/blob/master/NEWS.md)
- [Commits](https://github.com/ruby/rexml/compare/v3.2.5...v3.2.8)

---
updated-dependencies:
- dependency-name: rexml
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Gemfile.lock | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Gemfile.lock b/Gemfile.lock
index 2f8e470..ca3c33c 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -243,7 +243,8 @@ GEM
     rb-fsevent (0.11.1)
     rb-inotify (0.10.1)
       ffi (~> 1.0)
-    rexml (3.2.5)
+    rexml (3.2.8)
+      strscan (>= 3.0.9)
     rouge (3.26.0)
     ruby2_keywords (0.0.5)
     rubyzip (2.3.2)
@@ -258,6 +259,7 @@ GEM
       faraday (> 0.8, < 2.0)
     simpleidn (0.2.1)
       unf (~> 0.1.4)
+    strscan (3.1.0)
     terminal-table (1.8.0)
       unicode-display_width (~> 1.1, >= 1.1.1)
     thread_safe (0.3.6)

From 9b0ac7d230f18f7a371ece02d7ecb7724007faa9 Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Thu, 23 May 2024 14:44:45 +0200
Subject: [PATCH 23/32] Update rules.yml

---
 _data/rules.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index a41d8a2..cd0123e 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -876,11 +876,6 @@ groups:
                 query: 'avg by(instance) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(instance) (sum (mongodb_ss_connections) by (instance)) * 100 > 80'
                 severity: warning
                 for: 2m
-              - name: MongoDB virtual memory usage
-                description: High memory usage
-                query: "(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3"
-                severity: warning
-                for: 2m
 
           - name: dcu/mongodb_exporter
             slug: dcu-mongodb-exporter

From 8759c50440e0c95363c739432e8c3a21b864c8e0 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Thu, 23 May 2024 12:45:56 +0000
Subject: [PATCH 24/32] Publish

---
 dist/rules/mongodb/percona-mongodb-exporter.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/dist/rules/mongodb/percona-mongodb-exporter.yml b/dist/rules/mongodb/percona-mongodb-exporter.yml
index 3e1e5e9..1bd446f 100644
--- a/dist/rules/mongodb/percona-mongodb-exporter.yml
+++ b/dist/rules/mongodb/percona-mongodb-exporter.yml
@@ -66,12 +66,3 @@ groups:
       annotations:
         summary: MongoDB too many connections (instance {{ $labels.instance }})
         description: "Too many connections (> 80%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-    - alert: MongodbVirtualMemoryUsage
-      expr: '(sum(mongodb_ss_mem_virtual) BY (instance) / sum(mongodb_ss_mem_resident) BY (instance)) > 3'
-      for: 2m
-      labels:
-        severity: warning
-      annotations:
-        summary: MongoDB virtual memory usage (instance {{ $labels.instance }})
-        description: "High memory usage\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

From 1e4ea0b3e75cdbf59a56c3e1cad94deaf2ab723f Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Thu, 6 Jun 2024 22:53:29 +0200
Subject: [PATCH 25/32] Update rules.yml

---
 _data/rules.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index cd0123e..9a08f71 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -2421,7 +2421,7 @@ groups:
             rules:
               - name: Minio cluster disk offline
                 description: "Minio cluster disk is offline"
-                query: "minio_cluster_disk_offline_total > 0"
+                query: "minio_cluster_drive_offline_total > 0"
                 severity: critical
               - name: Minio node disk offline
                 description: "Minio cluster node disk is offline"

From 1ee046b7392426039031b521a433dcc177104134 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Thu, 6 Jun 2024 20:54:49 +0000
Subject: [PATCH 26/32] Publish

---
 dist/rules/minio/embedded-exporter.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dist/rules/minio/embedded-exporter.yml b/dist/rules/minio/embedded-exporter.yml
index 8e19729..1ac2de5 100644
--- a/dist/rules/minio/embedded-exporter.yml
+++ b/dist/rules/minio/embedded-exporter.yml
@@ -5,7 +5,7 @@ groups:
   rules:
 
     - alert: MinioClusterDiskOffline
-      expr: 'minio_cluster_disk_offline_total > 0'
+      expr: 'minio_cluster_drive_offline_total > 0'
       for: 0m
       labels:
         severity: critical

From ca4fb01c6dda3514048fb28166d3bd9f40b06ef7 Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Fri, 14 Jun 2024 20:15:44 +0200
Subject: [PATCH 27/32] Update rules.yml

---
 _data/rules.yml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index 9a08f71..8994d44 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -240,12 +240,15 @@ groups:
                 query: '(rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
                 severity: warning
                 for: 5m
-              - name: Host context switching
-                description: Context switching is growing on the node (> 10000 / CPU / s)
-                query: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+              - name: Host context switching high
+                description: Context switching is growing on the node (twice the daily average during the last 15m)
+                query: |
+                  (rate(node_context_switches_total[15m])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"}))
+                  /
+                  (rate(node_context_switches_total[1d])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) > 2
                 severity: warning
                 comments: |
-                  10000 context switches is an arbitrary number.
+                  x2 context switches is an arbitrary number.
                   The alert threshold depends on the nature of the application.
                   Please read: https://github.com/samber/awesome-prometheus-alerts/issues/58
               - name: Host swap is filling up

From 60c235975c4a34354d30031cef12234d3fe7e3f6 Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Fri, 14 Jun 2024 18:16:53 +0000
Subject: [PATCH 28/32] Publish

---
 dist/rules/host-and-hardware/node-exporter.yml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/dist/rules/host-and-hardware/node-exporter.yml b/dist/rules/host-and-hardware/node-exporter.yml
index 6655ef7..0d80c16 100644
--- a/dist/rules/host-and-hardware/node-exporter.yml
+++ b/dist/rules/host-and-hardware/node-exporter.yml
@@ -175,14 +175,17 @@ groups:
         summary: Host unusual disk IO (instance {{ $labels.instance }})
         description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    - alert: HostContextSwitching
-      expr: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+    - alert: HostContextSwitchingHigh
+      expr: '(rate(node_context_switches_total[15m])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"}))
+/
+(rate(node_context_switches_total[1d])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) > 2
+'
       for: 0m
       labels:
         severity: warning
       annotations:
-        summary: Host context switching (instance {{ $labels.instance }})
-        description: "Context switching is growing on the node (> 10000 / CPU / s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+        summary: Host context switching high (instance {{ $labels.instance }})
+        description: "Context switching is growing on the node (twice the daily average during the last 15m)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: HostSwapIsFillingUp
       expr: '((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'

From b6a6c2e31315873c39c28622ffb2f26ad8f5ce9b Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Tue, 2 Jul 2024 09:33:01 +0200
Subject: [PATCH 29/32] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 16c92c7..9188322 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,7 @@ Collection available here: **[https://samber.github.io/awesome-prometheus-alerts
 - [MongoDB](https://samber.github.io/awesome-prometheus-alerts/rules#mongodb)
 - [RabbitMQ](https://samber.github.io/awesome-prometheus-alerts/rules#rabbitmq)
 - [Elasticsearch](https://samber.github.io/awesome-prometheus-alerts/rules#elasticsearch)
+- [Meilisearch](https://samber.github.io/awesome-prometheus-alerts/rules#meilisearch)
 - [Cassandra](https://samber.github.io/awesome-prometheus-alerts/rules#cassandra)
 - [Clickhouse](https://samber.github.io/awesome-prometheus-alerts/rules#clickhouse)
 - [Zookeeper](https://samber.github.io/awesome-prometheus-alerts/rules#zookeeper)

From 9557d4b50e09f31d2f86ac1c70f87a025a6ac1b6 Mon Sep 17 00:00:00 2001
From: Greg <58505377+nohant@users.noreply.github.com>
Date: Tue, 2 Jul 2024 09:33:08 +0200
Subject: [PATCH 30/32] feat(meilisearch): add basic set of rules (#425)

* feat(meilisearch): add basic meilisearch rules

* fix(query): use == instead of =

* fix(data): set correct name and use ==

* chore(meilisearch): remove index filter
---
 _data/rules.yml                              | 15 +++++++++++++
 dist/rules/meilisearch/embedded-exporter.yml | 23 ++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 dist/rules/meilisearch/embedded-exporter.yml

diff --git a/_data/rules.yml b/_data/rules.yml
index 8994d44..128793f 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -842,6 +842,21 @@ groups:
                 query: "increase(redis_rejected_connections_total[1m]) > 0"
                 severity: critical
 
+      - name: Meilisearch
+        exporters:
+          - name: Embedded exporter
+            slug: embedded-exporter
+            doc_url: https://github.com/orgs/meilisearch/discussions/625
+            rules:                
+              - name: Meilisearch index is empty
+                description: Meilisearch instance is down
+                query: 'meilisearch_index_docs_count == 0'
+                severity: warning
+              - name: Meilisearch http response time
+                description: Meilisearch http response time is too high
+                query: "meilisearch_http_response_time_seconds > 0.5"
+                severity: warning
+
       - name: MongoDB
         exporters:
           - name: percona/mongodb_exporter
diff --git a/dist/rules/meilisearch/embedded-exporter.yml b/dist/rules/meilisearch/embedded-exporter.yml
new file mode 100644
index 0000000..a8824dd
--- /dev/null
+++ b/dist/rules/meilisearch/embedded-exporter.yml
@@ -0,0 +1,23 @@
+groups:
+
+- name: EmbeddedExporter
+
+  rules:
+
+    - alert: MeilisearchIndexIsEmpty
+      expr: meilisearch_index_docs_count == 0
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: the index {{ $labels.Index }} is empty
+        description: "The index {{ $labels.Index }} is empty at the moment, and shouldnt be empty\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+        
+    - alert: MeilisearchHttpResponseTimeIsTooHigh
+      expr: rate(meilisearch_http_response_time_seconds_sum[5m]) / rate(meilisearch_http_response_time_seconds_count[5m]) > 0.5
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: the meilisearch server http response time is too high
+        description: "The meilisearch server http response time is too high at the moment\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

From 47e74f65e02fbbb60fddaf450cca3178ef5e4ecd Mon Sep 17 00:00:00 2001
From: Samuel Berthe <dev@samuel-berthe.fr>
Date: Tue, 2 Jul 2024 09:33:51 +0200
Subject: [PATCH 31/32] Update rules.yml

---
 _data/rules.yml | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/_data/rules.yml b/_data/rules.yml
index 128793f..0216beb 100644
--- a/_data/rules.yml
+++ b/_data/rules.yml
@@ -842,21 +842,6 @@ groups:
                 query: "increase(redis_rejected_connections_total[1m]) > 0"
                 severity: critical
 
-      - name: Meilisearch
-        exporters:
-          - name: Embedded exporter
-            slug: embedded-exporter
-            doc_url: https://github.com/orgs/meilisearch/discussions/625
-            rules:                
-              - name: Meilisearch index is empty
-                description: Meilisearch instance is down
-                query: 'meilisearch_index_docs_count == 0'
-                severity: warning
-              - name: Meilisearch http response time
-                description: Meilisearch http response time is too high
-                query: "meilisearch_http_response_time_seconds > 0.5"
-                severity: warning
-
       - name: MongoDB
         exporters:
           - name: percona/mongodb_exporter
@@ -1162,6 +1147,21 @@ groups:
                 severity: warning
                 for: 5m                 
 
+      - name: Meilisearch
+        exporters:
+          - name: Embedded exporter
+            slug: embedded-exporter
+            doc_url: https://github.com/orgs/meilisearch/discussions/625
+            rules:                
+              - name: Meilisearch index is empty
+                description: Meilisearch instance is down
+                query: 'meilisearch_index_docs_count == 0'
+                severity: warning
+              - name: Meilisearch http response time
+                description: Meilisearch http response time is too high
+                query: "meilisearch_http_response_time_seconds > 0.5"
+                severity: warning
+
       - name: Cassandra
         exporters:
           - name: instaclustr/cassandra-exporter

From 58ade95b8bfaf20f875c74b8d7a6c509f70a77ab Mon Sep 17 00:00:00 2001
From: samber <samber@users.noreply.github.com>
Date: Tue, 2 Jul 2024 07:34:59 +0000
Subject: [PATCH 32/32] Publish

---
 dist/rules/meilisearch/embedded-exporter.yml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/dist/rules/meilisearch/embedded-exporter.yml b/dist/rules/meilisearch/embedded-exporter.yml
index a8824dd..8da2803 100644
--- a/dist/rules/meilisearch/embedded-exporter.yml
+++ b/dist/rules/meilisearch/embedded-exporter.yml
@@ -5,19 +5,19 @@ groups:
   rules:
 
     - alert: MeilisearchIndexIsEmpty
-      expr: meilisearch_index_docs_count == 0
-      for: 5m
+      expr: 'meilisearch_index_docs_count == 0'
+      for: 0m
       labels:
         severity: warning
       annotations:
-        summary: the index {{ $labels.Index }} is empty
-        description: "The index {{ $labels.Index }} is empty at the moment, and shouldnt be empty\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-        
-    - alert: MeilisearchHttpResponseTimeIsTooHigh
-      expr: rate(meilisearch_http_response_time_seconds_sum[5m]) / rate(meilisearch_http_response_time_seconds_count[5m]) > 0.5
-      for: 5m
+        summary: Meilisearch index is empty (instance {{ $labels.instance }})
+        description: "Meilisearch instance is down\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: MeilisearchHttpResponseTime
+      expr: 'meilisearch_http_response_time_seconds > 0.5'
+      for: 0m
       labels:
         severity: warning
       annotations:
-        summary: the meilisearch server http response time is too high
-        description: "The meilisearch server http response time is too high at the moment\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+        summary: Meilisearch http response time (instance {{ $labels.instance }})
+        description: "Meilisearch http response time is too high\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"