Blob Blame History Raw
# yamllint disable rule:line-length
#####################################################
## MANAGED BY SALT in salt/files/prometheus/alerts ##
#####################################################
---
groups:
  - name: ioo-mail
    rules:

      # Generic rule for everything without more specific declarations
      - alert: Postfix queue
        expr: >-
          postfix_queue_length{queue!~"active|deferred|flush|incoming"} > 0
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large mail queue on {{ $labels.instance }}
          description: |
            The Postfix "{{ $labels.queue }}" queue on {{ $labels.instance }} exceeds 0 messages for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Flush queue for all machines (1 entry is normal due to flush(8) not truncating unless woken up)
      - alert: Postfix flush queue
        expr: >-
          postfix_queue_length{queue="flush"} > 1
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large flush mail queue on {{ $labels.instance }}
          description: |
            The Postfix "flush" queue on {{ $labels.instance }} exceeds 1 message for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Active queue for all machines except mailman3 and mx*
      - alert: Postfix active queue
        expr: >-
          postfix_queue_length{instance!~"^(?:mailman3|mx[1-4]).infra.opensuse.org$", queue="active"} > 0
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large active mail queue on {{ $labels.instance }}
          description: |
            The Postfix "active" queue on {{ $labels.instance }} exceeds 0 messages for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Active queue specific to mx*
      - alert: Large Postfix active queue (MX)
        expr: >-
          postfix_queue_length{instance=~"^mx[1-4].infra.opensuse.org$", queue="active"} > 70
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large active mail queue on {{ $labels.instance }}
          description: |
            The Postfix "active" queue on {{ $labels.instance }} exceeds 70 messages for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Active queue specific to mailman3
      - alert: Large Postfix active queue (Mailman)
        expr: >-
          postfix_queue_length{instance=~"^mailman3.infra.opensuse.org$", queue="active"} > 30
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large active mail queue on {{ $labels.instance }}
          description: |
            The Postfix "active" queue on {{ $labels.instance }} exceeds 30 messages for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Deferred queue for all machines except mx*
      - alert: Postfix deferred queue
        expr: >-
          postfix_queue_length{instance!~"^mx[1-4].infra.opensuse.org$", queue="deferred"} > 1
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large deferred mail queue on {{ $labels.instance }}
          description: |
            The Postfix "deferred" queue on {{ $labels.instance }} exceeds 1 held message for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Deferred queue specific to mx*
      - alert: Large Postfix deferred queue
        expr: >-
          postfix_queue_length{instance=~"^mx[1-4].infra.opensuse.org$", queue="deferred"} > 400
        for: 310s
        labels:
          severity: critical
        annotations:
          title: >-
            Large deferred mail queue on {{ $labels.instance }}
          description: |
            The Postfix "deferred" queue on {{ $labels.instance }} exceeds 400 held messages for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Incoming queue for all machines except mx*
      - alert: Postfix incoming queue
        expr: >-
          postfix_queue_length{instance!~"^mx[1-4].infra.opensuse.org$", queue="incoming"} > 0
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large incoming mail queue on {{ $labels.instance }}
          description: |
            The Postfix "incoming" queue on {{ $labels.instance }} exceeds 0 messages for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}

      # Incoming queue specific to mx*
      - alert: Large Postfix incoming queue (MX)
        expr: >-
          postfix_queue_length{instance=~"^mx[1-4].infra.opensuse.org$", queue="incoming"} > 5
        for: 310s
        labels:
          severity: warning
        annotations:
          title: >-
            Large incoming mail queue on {{ $labels.instance }}
          description: |
            The Postfix "incoming" queue on {{ $labels.instance }} exceeds 5 messages for over five minutes.
            VALUE = {{ $value }}
            LABELS = {{ $labels }}