add smartctl exporter
This commit is contained in:
parent
afa66a4873
commit
68344219da
7 changed files with 7545 additions and 0 deletions
|
@ -11,6 +11,7 @@ resources:
|
||||||
- ./grafana/ks.yaml
|
- ./grafana/ks.yaml
|
||||||
- ./node-exporter/ks.yaml
|
- ./node-exporter/ks.yaml
|
||||||
- ./prometheus-operator-crds/ks.yaml
|
- ./prometheus-operator-crds/ks.yaml
|
||||||
|
- ./smartctl-exporter/ks.yaml
|
||||||
- ./unpoller/ks.yaml
|
- ./unpoller/ks.yaml
|
||||||
- ./vector-agent/ks.yaml
|
- ./vector-agent/ks.yaml
|
||||||
- ./vector-aggregator/ks.yaml
|
- ./vector-aggregator/ks.yaml
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json
|
||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: &app smartctl-exporter
|
||||||
|
spec:
|
||||||
|
interval: 30m
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
chart: prometheus-smartctl-exporter
|
||||||
|
version: 0.12.0
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: prometheus-community
|
||||||
|
namespace: flux-system
|
||||||
|
install:
|
||||||
|
remediation:
|
||||||
|
retries: 3
|
||||||
|
upgrade:
|
||||||
|
cleanupOnFail: true
|
||||||
|
remediation:
|
||||||
|
strategy: rollback
|
||||||
|
retries: 3
|
||||||
|
values:
|
||||||
|
fullnameOverride: *app
|
||||||
|
serviceMonitor:
|
||||||
|
enabled: true
|
||||||
|
prometheusRules:
|
||||||
|
enabled: false
|
|
@ -0,0 +1,19 @@
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- ./helmrelease.yaml
|
||||||
|
- ./prometheusrule.yaml
|
||||||
|
configMapGenerator:
|
||||||
|
- name: smartctl-exporter-dashboard
|
||||||
|
files:
|
||||||
|
- ./resources/blesswinsamuel_smartctl.json
|
||||||
|
- ./resources/smartctl_exporter.json
|
||||||
|
options:
|
||||||
|
annotations:
|
||||||
|
kustomize.toolkit.fluxcd.io/substitute: disabled
|
||||||
|
labels:
|
||||||
|
grafana_dashboard: "1"
|
||||||
|
generatorOptions:
|
||||||
|
disableNameSuffixHash: true
|
|
@ -0,0 +1,64 @@
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/prometheusrule_v1.json
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: smartctl-exporter-rules
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: smartctl-exporter.rules
|
||||||
|
rules:
|
||||||
|
- alert: SmartDeviceHighTemperature
|
||||||
|
annotations:
|
||||||
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
||||||
|
has a temperature higher than 65°C.
|
||||||
|
expr: smartctl_device_temperature > 65
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: SmartDeviceTestFailed
|
||||||
|
annotations:
|
||||||
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
||||||
|
did not pass its SMART test.
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
smartctl_device_smart_status != 1
|
||||||
|
or
|
||||||
|
smartctl_device_status != 1
|
||||||
|
)
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: SmartDeviceCriticalWarning
|
||||||
|
annotations:
|
||||||
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
||||||
|
is in a critical state.
|
||||||
|
expr: smartctl_device_critical_warning != 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: SmartDeviceMediaErrors
|
||||||
|
annotations:
|
||||||
|
summary: Mounted drive {{ $labels.device }} on device {{ $labels.instance }}
|
||||||
|
has media errors.
|
||||||
|
expr: smartctl_device_media_errors{device!~"^nvme.+"} != 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: SmartDeviceAvailableSpareUnderThreadhold
|
||||||
|
annotations:
|
||||||
|
summary: Device {{ $labels.device }} on instance {{ $labels.instance }}
|
||||||
|
is under available spare threashold.
|
||||||
|
expr: smartctl_device_available_spare_threshold > smartctl_device_available_spare
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: SmartDeviceInterfaceSlow
|
||||||
|
annotations:
|
||||||
|
summary: Device {{ $labels.device }} on instance {{ $labels.instance }}
|
||||||
|
interface is slower then it should be.
|
||||||
|
expr: |
|
||||||
|
smartctl_device_interface_speed{speed_type="current"} != on(device, instance, namespace, pod) smartctl_device_interface_speed{speed_type="max"}
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
20
kubernetes/apps/observability/smartcl-exporter/ks.yaml
Normal file
20
kubernetes/apps/observability/smartcl-exporter/ks.yaml
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
---
|
||||||
|
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: &app smartctl-exporter
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
targetNamespace: observability
|
||||||
|
commonMetadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: *app
|
||||||
|
path: ./kubernetes/apps/observability/smartctl-exporter/app
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: theshire
|
||||||
|
wait: false
|
||||||
|
interval: 30m
|
||||||
|
retryInterval: 1m
|
Loading…
Reference in a new issue