groups: - name: IamsethOracledbExporter rules: # 1m delay allows a restart without triggering an alert. - alert: OracleDbDown expr: 'oracledb_up == 0' for: 1m labels: severity: critical annotations: summary: Oracle DB down (instance {{ $labels.instance }}) description: "Oracle Database instance is down on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold is workload-dependent. Adjust 85% to suit your environment. - alert: OracleDbSessionsReachingLimit(>85%) expr: 'oracledb_resource_current_utilization{resource_name="sessions"} / oracledb_resource_limit_value{resource_name="sessions"} * 100 > 85 and oracledb_resource_limit_value{resource_name="sessions"} > 0' for: 5m labels: severity: warning annotations: summary: Oracle DB sessions reaching limit (> 85%) (instance {{ $labels.instance }}) description: "Oracle Database session utilization is above 85% on {{ $labels.instance }} (current value: {{ $value }}%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold is workload-dependent. Adjust 85% to suit your environment. - alert: OracleDbProcessesReachingLimit(>85%) expr: 'oracledb_resource_current_utilization{resource_name="processes"} / oracledb_resource_limit_value{resource_name="processes"} * 100 > 85 and oracledb_resource_limit_value{resource_name="processes"} > 0' for: 5m labels: severity: warning annotations: summary: Oracle DB processes reaching limit (> 85%) (instance {{ $labels.instance }}) description: "Oracle Database process utilization is above 85% on {{ $labels.instance }} (current value: {{ $value }}%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: OracleDbTablespaceReachingCapacity(>85%) expr: 'oracledb_tablespace_used_percent > 85' for: 5m labels: severity: warning annotations: summary: Oracle DB tablespace reaching capacity (> 85%) (instance {{ $labels.instance }}) description: "Oracle Database tablespace {{ $labels.tablespace }} is above 85% usage on {{ $labels.instance }} (current value: {{ $value }}%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: OracleDbTablespaceFull(>95%) expr: 'oracledb_tablespace_used_percent > 95' for: 5m labels: severity: critical annotations: summary: Oracle DB tablespace full (> 95%) (instance {{ $labels.instance }}) description: "Oracle Database tablespace {{ $labels.tablespace }} is critically full on {{ $labels.instance }} (current value: {{ $value }}%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # A high rollback rate (>20%) often indicates application-level issues such as deadlocks, constraint violations, or poorly designed transactions. - alert: OracleDbHighUserRollbacks expr: 'rate(oracledb_activity_user_rollbacks[5m]) / (rate(oracledb_activity_user_commits[5m]) + rate(oracledb_activity_user_rollbacks[5m])) * 100 > 20 and (rate(oracledb_activity_user_commits[5m]) + rate(oracledb_activity_user_rollbacks[5m])) > 0' for: 5m labels: severity: warning annotations: summary: Oracle DB high user rollbacks (instance {{ $labels.instance }}) description: "Oracle Database on {{ $labels.instance }} has a high rollback rate ({{ $value }}% of transactions are rolled back)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # Threshold is highly workload-dependent. Adjust 200 to suit your environment. - alert: OracleDbTooManyActiveSessions expr: 'oracledb_sessions_value{status="ACTIVE", type="USER"} > 200' for: 5m labels: severity: warning annotations: summary: Oracle DB too many active sessions (instance {{ $labels.instance }}) description: "Oracle Database on {{ $labels.instance }} has too many active user sessions (current value: {{ $value }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # The metric from v$waitclassmetric is already a normalized rate (centiseconds per second). Threshold 300 means 3 seconds of I/O wait per second of wall time. - alert: OracleDbHighWaitTime(userI/o) expr: 'oracledb_wait_time_user_io > 300' for: 5m labels: severity: warning annotations: summary: Oracle DB high wait time (user I/O) (instance {{ $labels.instance }}) description: "Oracle Database on {{ $labels.instance }} is experiencing high user I/O wait time\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"