theshire/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml

190 lines
5.3 KiB
YAML

---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: kube-prometheus-stack
version: 58.6.0
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
crds: CreateReplace
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: CreateReplace
remediation:
strategy: rollback
retries: 3
values:
crds:
enabled: true
cleanPrometheusOperatorObjectNames: true
alertmanager:
ingress:
enabled: true
pathType: Prefix
ingressClassName: internal-nginx
hosts:
- &host alertmanager.jahanson.tech
tls:
- hosts:
- *host
alertmanagerSpec:
replicas: 1
useExistingSecret: true
configSecret: alertmanager-secret
storage:
volumeClaimTemplate:
spec:
storageClassName: openebs-zfs
resources:
requests:
storage: 1Gi
kubelet:
enabled: true
serviceMonitor:
metricRelabelings:
# Drop high cardinality labels
- action: labeldrop
regex: (uid)
- action: labeldrop
regex: (id|name)
- action: drop
sourceLabels: ["__name__"]
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
kubeApiServer:
enabled: true
serviceMonitor:
metricRelabelings:
# Drop high cardinality labels
- action: drop
sourceLabels: ["__name__"]
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
- action: drop
sourceLabels: ["__name__"]
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
kubeControllerManager:
enabled: true
endpoints: &cp
- 10.1.1.61
kubeEtcd:
enabled: true
endpoints: *cp
kubeScheduler:
enabled: true
endpoints: *cp
kubeProxy:
enabled: false
prometheus:
ingress:
enabled: true
ingressClassName: internal-nginx
pathType: Prefix
hosts:
- &host prometheus.jahanson.tech
tls:
- hosts:
- *host
thanosService:
enabled: true
thanosServiceMonitor:
enabled: true
# thanosServiceExternal:
# enabled: true
# type: LoadBalancer
# annotations:
# external-dns.alpha.kubernetes.io/hostname: thanos.jahanson.tech
# io.cilium/lb-ipam-ips: 10.45.0.6
# externalTrafficPolicy: Cluster
prometheusSpec:
podMetadata:
annotations:
secret.reloader.stakater.com/reload: &secret thanos-objstore-config
replicas: 1
replicaExternalLabelName: __replica__
scrapeInterval: 1m # Must match interval in Grafana Helm chart
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
probeSelectorNilUsesHelmValues: false
scrapeConfigSelectorNilUsesHelmValues: false
enableAdminAPI: true
walCompression: true
enableFeatures:
- auto-gomemlimit
- memory-snapshot-on-shutdown
- new-service-discovery-manager
image:
registry: quay.io
repository: prometheus/prometheus
tag: v2.51.0-dedupelabels
thanos:
image: quay.io/thanos/thanos:${THANOS_VERSION}
version: "${THANOS_VERSION#v}"
objectStorageConfig:
existingSecret:
name: *secret
key: config
retention: 2d
retentionSize: 15GB
externalLabels:
cluster: main
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: openebs-zfs
resources:
requests:
storage: 20Gi
nodeExporter:
enabled: true
prometheus-node-exporter:
fullnameOverride: node-exporter
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
kubeStateMetrics:
enabled: true
kube-state-metrics:
fullnameOverride: kube-state-metrics
metricLabelsAllowlist:
- pods=[*]
- deployments=[*]
- persistentvolumeclaims=[*]
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
grafana:
enabled: false
forceDeployDashboards: true
sidecar:
dashboards:
annotations:
grafana_folder: Kubernetes
multicluster:
etcd:
enabled: true