Add alert rules to prometheus for minio.

This commit is contained in:
Joseph Hanson 2023-10-31 14:56:33 -05:00
parent 5f71ae7916
commit 7116d060be
Signed by: jahanson
SSH key fingerprint: SHA256:vy6dKBECV522aPAwklFM3ReKAVB086rT3oWwiuiFG7o
3 changed files with 39 additions and 0 deletions

View file

@ -7,3 +7,4 @@ resources:
- ./externalsecret.yaml - ./externalsecret.yaml
- ./helmrelease.yaml - ./helmrelease.yaml
- ./scrapeconfigs - ./scrapeconfigs
- ./prometheusrules

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./minio.yaml

View file

@ -0,0 +1,31 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.devbu.io/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: miscellaneous-rules
namespace: monitoring
labels:
prometheus: k8s
role: alert-rules
spec:
groups:
- name: minio-alerts
rules:
- alert: NodesOffline
expr: avg_over_time(minio_cluster_nodes_offline_total{job="minio-job"}[5m]) > 0
for: 10m
labels:
severity: warn
annotations:
summary: "Node down in MinIO deployment"
description: "Node(s) in cluster {{ $labels.instance }} offline for more than 5 minutes"
- alert: DisksOffline
expr: avg_over_time(minio_cluster_disk_offline_total{job="minio-job"}[5m]) > 0
for: 10m
labels:
severity: warn
annotations:
summary: "Disks down in MinIO deployment"
description: "Disks(s) in cluster {{ $labels.instance }} offline for more than 5 minutes"