64 lines
2.4 KiB
YAML
64 lines
2.4 KiB
YAML
---
|
|
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/prometheusrule_v1.json
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: smartctl-exporter-rules
|
|
spec:
|
|
groups:
|
|
- name: smartctl-exporter.rules
|
|
rules:
|
|
- alert: SmartDeviceHighTemperature
|
|
annotations:
|
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
|
has a temperature higher than 65°C.
|
|
expr: smartctl_device_temperature > 65
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
- alert: SmartDeviceTestFailed
|
|
annotations:
|
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
|
did not pass its SMART test.
|
|
expr: |
|
|
(
|
|
smartctl_device_smart_status != 1
|
|
or
|
|
smartctl_device_status != 1
|
|
)
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
- alert: SmartDeviceCriticalWarning
|
|
annotations:
|
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
|
is in a critical state.
|
|
expr: smartctl_device_critical_warning != 0
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
- alert: SmartDeviceMediaErrors
|
|
annotations:
|
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
|
has media errors.
|
|
expr: smartctl_device_media_errors{device!~"^nvme.+"} != 0
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
- alert: SmartDeviceAvailableSpareUnderThreadhold
|
|
annotations:
|
|
summary: Device {{ $labels.device }} on instance {{ $labels.instance }}
|
|
is under available spare threashold.
|
|
expr: smartctl_device_available_spare_threshold > smartctl_device_available_spare
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
- alert: SmartDeviceInterfaceSlow
|
|
annotations:
|
|
summary: Device {{ $labels.device }} on instance {{ $labels.instance }}
|
|
interface is slower then it should be.
|
|
expr: |
|
|
smartctl_device_interface_speed{speed_type="current"} != on(device, instance, namespace, pod) smartctl_device_interface_speed{speed_type="max"}
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|