Add loki alerting rules.

This commit is contained in:
Joseph Hanson 2024-02-29 11:11:13 -06:00
parent 2087fa8688
commit 19c57a2cc4

View file

@ -0,0 +1,14 @@
---
groups:
- name: smart
rules:
- alert: SMARTFailure
expr: |
sum by (hostname) (count_over_time({hostname=~".+"} | json | _SYSTEMD_UNIT = "smartmontools.service" !~ "(?i)previous self-test completed without error" !~ "(?i)Prefailure" |~ "(?i)(error|fail)"[2m])) > 0
for: 2m
labels:
severity: critical
category: logs
annotations:
hostname: "{{ $labels.hostname }}"
summary: "{{ $labels.hostname }} has reported SMART failures"