--- # yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json apiVersion: helm.toolkit.fluxcd.io/v2 kind: HelmRelease metadata: name: kube-prometheus-stack spec: interval: 30m timeout: 15m chart: spec: chart: kube-prometheus-stack version: 60.3.0 sourceRef: kind: HelmRepository name: prometheus-community namespace: flux-system install: crds: CreateReplace remediation: retries: 3 upgrade: cleanupOnFail: true crds: CreateReplace remediation: strategy: rollback retries: 3 values: crds: enabled: true cleanPrometheusOperatorObjectNames: true alertmanager: ingress: enabled: true pathType: Prefix ingressClassName: internal-nginx hosts: - &host alertmanager.jahanson.tech tls: - hosts: - *host alertmanagerSpec: replicas: 1 useExistingSecret: true configSecret: alertmanager-secret storage: volumeClaimTemplate: spec: storageClassName: openebs-zfs resources: requests: storage: 1Gi kubelet: enabled: true serviceMonitor: metricRelabelings: # Drop high cardinality labels - action: labeldrop regex: (uid) - action: labeldrop regex: (id|name) - action: drop sourceLabels: ["__name__"] regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count) kubeApiServer: enabled: true serviceMonitor: metricRelabelings: # Drop high cardinality labels - action: drop sourceLabels: ["__name__"] regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket - action: drop sourceLabels: ["__name__"] regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket) kubeControllerManager: enabled: true endpoints: &cp - 10.1.1.61 kubeEtcd: enabled: true endpoints: *cp kubeScheduler: enabled: true endpoints: *cp kubeProxy: enabled: false prometheus: ingress: enabled: true ingressClassName: internal-nginx pathType: Prefix hosts: - &host prometheus.jahanson.tech tls: - hosts: - *host thanosService: enabled: true thanosServiceMonitor: enabled: true # thanosServiceExternal: # enabled: true # type: LoadBalancer # annotations: # external-dns.alpha.kubernetes.io/hostname: thanos.jahanson.tech # io.cilium/lb-ipam-ips: 10.45.0.6 # externalTrafficPolicy: Cluster prometheusSpec: podMetadata: annotations: secret.reloader.stakater.com/reload: &secret thanos-objstore-config replicas: 1 replicaExternalLabelName: __replica__ scrapeInterval: 1m # Must match interval in Grafana Helm chart ruleSelectorNilUsesHelmValues: false serviceMonitorSelectorNilUsesHelmValues: false podMonitorSelectorNilUsesHelmValues: false probeSelectorNilUsesHelmValues: false scrapeConfigSelectorNilUsesHelmValues: false enableAdminAPI: true walCompression: true enableFeatures: - auto-gomemlimit - memory-snapshot-on-shutdown - new-service-discovery-manager image: registry: quay.io repository: prometheus/prometheus tag: v2.51.0-dedupelabels thanos: image: quay.io/thanos/thanos:${THANOS_VERSION} version: "${THANOS_VERSION#v}" objectStorageConfig: existingSecret: name: *secret key: config retention: 2d retentionSize: 15GB externalLabels: cluster: main storageSpec: volumeClaimTemplate: spec: storageClassName: openebs-zfs resources: requests: storage: 20Gi nodeExporter: enabled: true prometheus-node-exporter: fullnameOverride: node-exporter prometheus: monitor: enabled: true relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node kubeStateMetrics: enabled: true kube-state-metrics: fullnameOverride: kube-state-metrics metricLabelsAllowlist: - pods=[*] - deployments=[*] - persistentvolumeclaims=[*] prometheus: monitor: enabled: true relabelings: - action: replace regex: (.*) replacement: $1 sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: kubernetes_node grafana: enabled: false forceDeployDashboards: true sidecar: dashboards: annotations: grafana_folder: Kubernetes multicluster: etcd: enabled: true