From 3e6714ea9053c98195613d0188aed62e6f3c430c Mon Sep 17 00:00:00 2001 From: Georg Pfuetzenreuter Date: Mar 24 2024 22:19:09 +0000 Subject: Configure Postfix queue alerts This adds alerting for unusual Postfix queue sizes. Signed-off-by: Georg Pfuetzenreuter --- diff --git a/salt/files/prometheus/alerts/mail.yml b/salt/files/prometheus/alerts/mail.yml new file mode 100644 index 0000000..c058e3a --- /dev/null +++ b/salt/files/prometheus/alerts/mail.yml @@ -0,0 +1,63 @@ +# yamllint disable rule:line-length +##################################################### +## MANAGED BY SALT in salt/files/prometheus/alerts ## +##################################################### +--- +groups: + - name: ioo-mail + rules: + - alert: Postfix queue + expr: >- + postfix_queue_length{queue!~"deferred|flush"} > 0 + for: 30s + labels: + severity: warning + annotations: + title: >- + Large mail queue on {{ $labels.instance }} + description: | + The Postfix "{{ $labels.queue }}" queue on {{ $labels.instance }} exceeds 0 messages for over thirty seconds. + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: Postfix flush queue + expr: >- + postfix_queue_length{queue="flush"} > 1 + for: 30s + labels: + severity: warning + annotations: + title: >- + Large flush mail queue on {{ $labels.instance }} + description: | + The Postfix "flush" queue on {{ $labels.instance }} exceeds 1 message for over thirty seconds. + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: Postfix deferred queue + expr: >- + postfix_queue_length{instance!~"mx[1-4].infra.opensuse.org", queue="deferred"} > 1 + for: 30s + labels: + severity: warning + annotations: + title: >- + Large deferred mail queue on {{ $labels.instance }} + description: | + The Postfix "deferred" queue on {{ $labels.instance }} exceeds 1 held message for over thirty seconds. + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: Large Postfix deferred queue + expr: >- + postfix_queue_length{instance=~"mx[1-4].infra.opensuse.org", queue="deferred"} > 400 + for: 30s + labels: + severity: critical + annotations: + title: >- + Large deferred mail queue on {{ $labels.instance }} + description: | + The Postfix "deferred" queue on {{ $labels.instance }} exceeds 400 held messages for over thirty seconds. + VALUE = {{ $value }} + LABELS = {{ $labels }}