--- groups: - name: smart rules: - alert: SMARTFailure expr: | sum by (hostname) (count_over_time({hostname=~".+"} | json | _SYSTEMD_UNIT = "smartmontools.service" !~ "(?i)previous self-test completed without error" !~ "(?i)Prefailure" |~ "(?i)(error|fail)"[2m])) > 0 for: 2m labels: severity: critical category: logs annotations: hostname: "{{ $labels.hostname }}" summary: "{{ $labels.hostname }} has reported SMART failures"