awesome-prometheus-alerts/dist/rules/elasticsearch/prometheus-community-elasticsearch-exporter.yml

groups:

- name: PrometheusCommunityElasticsearchExporter

  rules:

    - alert: ElasticsearchHeapUsageTooHigh
      expr: '(elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 90'
      for: 2m
      labels:
        severity: critical
      annotations:
        summary: Elasticsearch Heap Usage Too High (instance {{ $labels.instance }})
        description: "The heap usage is over 90%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchHeapUsageWarning
      expr: '(elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"}) * 100 > 80'
      for: 2m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch Heap Usage warning (instance {{ $labels.instance }})
        description: "The heap usage is over 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchDiskOutOfSpace
      expr: 'elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 10'
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: Elasticsearch disk out of space (instance {{ $labels.instance }})
        description: "The disk usage is over 90%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchDiskSpaceLow
      expr: 'elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes * 100 < 20'
      for: 2m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch disk space low (instance {{ $labels.instance }})
        description: "The disk usage is over 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchClusterRed
      expr: 'elasticsearch_cluster_health_status{color="red"} == 1'
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: Elasticsearch Cluster Red (instance {{ $labels.instance }})
        description: "Elastic Cluster Red status\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchClusterYellow
      expr: 'elasticsearch_cluster_health_status{color="yellow"} == 1'
      for: 0m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch Cluster Yellow (instance {{ $labels.instance }})
        description: "Elastic Cluster Yellow status\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchHealthyNodes
      expr: 'elasticsearch_cluster_health_number_of_nodes < 3'
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: Elasticsearch Healthy Nodes (instance {{ $labels.instance }})
        description: "Missing node in Elasticsearch cluster\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchHealthyDataNodes
      expr: 'elasticsearch_cluster_health_number_of_data_nodes < 3'
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: Elasticsearch Healthy Data Nodes (instance {{ $labels.instance }})
        description: "Missing data node in Elasticsearch cluster\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchRelocatingShards
      expr: 'elasticsearch_cluster_health_relocating_shards > 0'
      for: 0m
      labels:
        severity: info
      annotations:
        summary: Elasticsearch relocating shards (instance {{ $labels.instance }})
        description: "Elasticsearch is relocating shards\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchRelocatingShardsTooLong
      expr: 'elasticsearch_cluster_health_relocating_shards > 0'
      for: 15m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch relocating shards too long (instance {{ $labels.instance }})
        description: "Elasticsearch has been relocating shards for 15min\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchInitializingShards
      expr: 'elasticsearch_cluster_health_initializing_shards > 0'
      for: 0m
      labels:
        severity: info
      annotations:
        summary: Elasticsearch initializing shards (instance {{ $labels.instance }})
        description: "Elasticsearch is initializing shards\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchInitializingShardsTooLong
      expr: 'elasticsearch_cluster_health_initializing_shards > 0'
      for: 15m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch initializing shards too long (instance {{ $labels.instance }})
        description: "Elasticsearch has been initializing shards for 15 min\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchUnassignedShards
      expr: 'elasticsearch_cluster_health_unassigned_shards > 0'
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: Elasticsearch unassigned shards (instance {{ $labels.instance }})
        description: "Elasticsearch has unassigned shards\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchPendingTasks
      expr: 'elasticsearch_cluster_health_number_of_pending_tasks > 0'
      for: 15m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch pending tasks (instance {{ $labels.instance }})
        description: "Elasticsearch has pending tasks. Cluster works slowly.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchNoNewDocuments
      expr: 'increase(elasticsearch_indices_indexing_index_total{es_data_node="true"}[10m]) < 1'
      for: 0m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch no new documents (instance {{ $labels.instance }})
        description: "No new documents for 10 min!\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchHighIndexingLatency
      expr: 'increase(elasticsearch_indices_indexing_index_time_seconds_total[1m]) / increase(elasticsearch_indices_indexing_index_total[1m]) > 0.0005'
      for: 10m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch High Indexing Latency (instance {{ $labels.instance }})
        description: "The indexing latency on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchHighIndexingRate
      expr: 'sum(rate(elasticsearch_indices_indexing_index_total[1m]))> 10000'
      for: 5m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch High Indexing Rate (instance {{ $labels.instance }})
        description: "The indexing rate on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchHighQueryRate
      expr: 'sum(rate(elasticsearch_indices_search_query_total[1m])) > 100'
      for: 5m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch High Query Rate (instance {{ $labels.instance }})
        description: "The query rate on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

    - alert: ElasticsearchHighQueryLatency
      expr: 'increase(elasticsearch_indices_search_fetch_time_seconds[1m]) / increase(elasticsearch_indices_search_fetch_total[1m]) > 1'
      for: 5m
      labels:
        severity: warning
      annotations:
        summary: Elasticsearch High Query Latency (instance {{ $labels.instance }})
        description: "The query latency on Elasticsearch cluster is higher than the threshold.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"