theshire/kubernetes/apps/flux-system/add-ons/monitoring/prometheusrule.yaml
2024-01-11 15:03:54 -06:00

32 lines
1.1 KiB
YAML

---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: flux-rules
namespace: flux-system
spec:
groups:
- name: flux.rules
rules:
- alert: FluxComponentAbsent
annotations:
summary: Flux component has disappeared from Prometheus target discovery.
expr: |
absent(up{job=~".*flux-system.*"} == 1)
for: 15m
labels:
severity: critical
- alert: FluxReconciliationFailure
annotations:
summary: >-
{{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation
has been failing for more than 15 minutes.
expr: |
max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind)
+
on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"})
by (namespace, name, kind)) * 2 == 1
for: 15m
labels:
severity: critical