# yamllint disable rule:line-length
#####################################################
## MANAGED BY SALT in salt/files/prometheus/alerts ##
#####################################################
---
groups:
- name: ioo-mail
rules:
# Generic rule for everything without more specific declarations
- alert: Postfix queue
expr: >-
postfix_queue_length{queue!~"active|deferred|flush|incoming"} > 0
for: 310s
labels:
severity: warning
annotations:
title: >-
Large mail queue on {{ $labels.instance }}
description: |
The Postfix "{{ $labels.queue }}" queue on {{ $labels.instance }} exceeds 0 messages for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Flush queue for all machines (1 entry is normal due to flush(8) not truncating unless woken up)
- alert: Postfix flush queue
expr: >-
postfix_queue_length{queue="flush"} > 1
for: 310s
labels:
severity: warning
annotations:
title: >-
Large flush mail queue on {{ $labels.instance }}
description: |
The Postfix "flush" queue on {{ $labels.instance }} exceeds 1 message for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Active queue for all machines except mailman3 and mx*
- alert: Postfix active queue
expr: >-
postfix_queue_length{instance!~"^(?:mailman3|mx[1-4]).infra.opensuse.org$", queue="active"} > 0
for: 310s
labels:
severity: warning
annotations:
title: >-
Large active mail queue on {{ $labels.instance }}
description: |
The Postfix "active" queue on {{ $labels.instance }} exceeds 0 messages for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Active queue specific to mx*
- alert: Large Postfix active queue (MX)
expr: >-
postfix_queue_length{instance=~"^mx[1-4].infra.opensuse.org$", queue="active"} > 70
for: 310s
labels:
severity: warning
annotations:
title: >-
Large active mail queue on {{ $labels.instance }}
description: |
The Postfix "active" queue on {{ $labels.instance }} exceeds 70 messages for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Active queue specific to mailman3
- alert: Large Postfix active queue (Mailman)
expr: >-
postfix_queue_length{instance=~"^mailman3.infra.opensuse.org$", queue="active"} > 30
for: 310s
labels:
severity: warning
annotations:
title: >-
Large active mail queue on {{ $labels.instance }}
description: |
The Postfix "active" queue on {{ $labels.instance }} exceeds 30 messages for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Deferred queue for all machines except mx*
- alert: Postfix deferred queue
expr: >-
postfix_queue_length{instance!~"^mx[1-4].infra.opensuse.org$", queue="deferred"} > 1
for: 310s
labels:
severity: warning
annotations:
title: >-
Large deferred mail queue on {{ $labels.instance }}
description: |
The Postfix "deferred" queue on {{ $labels.instance }} exceeds 1 held message for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Deferred queue specific to mx*
- alert: Large Postfix deferred queue
expr: >-
postfix_queue_length{instance=~"^mx[1-4].infra.opensuse.org$", queue="deferred"} > 400
for: 310s
labels:
severity: critical
annotations:
title: >-
Large deferred mail queue on {{ $labels.instance }}
description: |
The Postfix "deferred" queue on {{ $labels.instance }} exceeds 400 held messages for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Incoming queue for all machines except mx*
- alert: Postfix incoming queue
expr: >-
postfix_queue_length{instance!~"^mx[1-4].infra.opensuse.org$", queue="incoming"} > 0
for: 310s
labels:
severity: warning
annotations:
title: >-
Large incoming mail queue on {{ $labels.instance }}
description: |
The Postfix "incoming" queue on {{ $labels.instance }} exceeds 0 messages for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
# Incoming queue specific to mx*
- alert: Large Postfix incoming queue (MX)
expr: >-
postfix_queue_length{instance=~"^mx[1-4].infra.opensuse.org$", queue="incoming"} > 5
for: 310s
labels:
severity: warning
annotations:
title: >-
Large incoming mail queue on {{ $labels.instance }}
description: |
The Postfix "incoming" queue on {{ $labels.instance }} exceeds 5 messages for over five minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}