theshire/kubernetes/apps/observability/victoria-metrics/app/helmrelease.yaml

202 lines
5.8 KiB
YAML

---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: victoria-metrics
spec:
interval: 30m
chart:
spec:
chart: victoria-metrics-k8s-stack
version: 0.25.8
sourceRef:
kind: HelmRepository
name: victoria-metrics
namespace: flux-system
values:
fullnameOverride: victoria-metrics
# VM Operator deployment
victoria-metrics-operator:
enabled: true
operator:
# disable_prometheus_converter: false # Ensure we keep enabled the converter to sync prom rules to VM rules
enable_converter_ownership: true # Required to allow VM to remove VM rules it imports if a prometheus rule is deleted
# Single-binary vm cluster
vmsingle:
enabled: true
spec:
extraArgs:
dedup.minScrapeInterval: 30s
maxLabelsPerTimeseries: "90"
search.minStalenessInterval: 5m
vmalert.proxyURL: http://vmalert-victoria-metrics.observability.svc.cluster.local:8080
retentionPeriod: 1y
storage:
storageClassName: "openebs-zfs"
resources:
requests:
storage: "50Gi"
accessModes:
- ReadWriteOnce
ingress:
enabled: true
ingressClassName: internal-nginx
hosts:
- vm.jahanson.tech
# VM Alerting (however, this just watches & passes alerts to alertmanager)
vmalert:
enabled: true
spec:
replicaCount: 1
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 150m
memory: 256Mi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: vmalert
extraArgs:
external.url: https://vmalert.jahanson.tech
notifiers:
- url: http://alertmanager.observability.svc.cluster.local:9093
ingress:
enabled: true
ingressClassName: internal-nginx
hosts:
- vmalert.jahanson.tech
# VM Data scraping
vmagent:
enabled: true
spec:
replicaCount: 1
shardCount: 2
scrapeInterval: 30s
externalLabels:
cluster: main
resources:
requests:
cpu: 50m
memory: 256Mi
limits:
cpu: 400m
memory: 512Mi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: vmagent
additionalScrapeConfigs:
name: vm-additional-scrape-configs
key: prometheus-additional.yaml
ingress:
enabled: true
ingressClassName: internal-nginx
hosts:
- vmagent.jahanson.tech
# Extra slack templates
monzoTemplate:
enabled: false
# Scrape configs
kubelet:
enabled: true
spec:
interval: 30s
# drop high cardinality label and useless metrics for cadvisor and kubelet
metricRelabelConfigs:
- action: labeldrop
regex: (uid|pod_uid|id)
- action: labeldrop
regex: (name)
- action: drop
source_labels: [__name__]
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
- action: drop
source_labels: [__name__]
regex: (container_tasks_state|container_memory_failures_total)
- action: drop
source_labels: [__name__]
regex: (container_blkio_device_usage_total)
- action: drop
source_labels: [__name__]
regex: (prober_probe_duration_seconds_bucket)
relabelConfigs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- sourceLabels: [__metrics_path__]
targetLabel: metrics_path
- targetLabel: "job"
replacement: "kubelet"
# kubeApiServer:
# enabled: false
# kubeControllerManager:
# enabled: false
# kubeProxy:
# enabled: false
# kubeScheduler:
# enabled: false
# kubeEtcd:
# enabled: false
# Enable deployment of kube-state-metrics
# kube-state-metrics:
# enabled: false
# Enable deployment of prometheus-node-exporter
# prometheus-node-exporter:
# enabled: false
# Enable deployment of grafana
# defaultDashboardsEnabled: false
# grafana:
# enabled: false
# Enable deployment of alertmanager
# alertmanager:
# enabled: false
# Prepared sets of default rules
# Adjust to what scraping functions you have enabled
# i.e. if you dont have kubeapisever setup & enabled, disable
# the kubeApiserver rules below
defaultRules:
create: true
rules:
etcd: false
general: true
k8s: true
kubeApiserver: true
kubeApiserverAvailability: true
kubeApiserverBurnrate: true
kubeApiserverHistogram: true
kubeApiserverSlos: true
kubelet: true
kubePrometheusGeneral: true
kubePrometheusNodeRecording: true
kubernetesApps: true
kubernetesResources: true
kubernetesStorage: true
kubernetesSystem: true
kubeScheduler: false
kubeStateMetrics: true
network: true
node: true
vmagent: true
vmsingle: false
vmhealth: true
alertmanager: false