2024-08-16 18:55:06 -05:00
|
|
|
---
|
|
|
|
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
|
|
|
|
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
|
|
|
kind: HelmRelease
|
|
|
|
metadata:
|
|
|
|
name: victoria-metrics
|
|
|
|
spec:
|
|
|
|
interval: 30m
|
|
|
|
chart:
|
|
|
|
spec:
|
|
|
|
chart: victoria-metrics-k8s-stack
|
2024-08-16 19:05:31 -05:00
|
|
|
version: 0.25.0
|
2024-08-16 18:55:06 -05:00
|
|
|
sourceRef:
|
|
|
|
kind: HelmRepository
|
|
|
|
name: victoria-metrics
|
2024-08-16 19:03:11 -05:00
|
|
|
namespace: flux-system
|
2024-08-16 18:55:06 -05:00
|
|
|
values:
|
|
|
|
fullnameOverride: victoria-metrics
|
|
|
|
|
|
|
|
# VM Operator deployment
|
|
|
|
victoria-metrics-operator:
|
|
|
|
enabled: true
|
|
|
|
operator:
|
|
|
|
# disable_prometheus_converter: false # Ensure we keep enabled the converter to sync prom rules to VM rules
|
|
|
|
enable_converter_ownership: true # Required to allow VM to remove VM rules it imports if a prometheus rule is deleted
|
|
|
|
|
|
|
|
# Single-binary vm cluster
|
|
|
|
vmsingle:
|
|
|
|
enabled: true
|
|
|
|
spec:
|
|
|
|
extraArgs:
|
|
|
|
dedup.minScrapeInterval: 30s
|
|
|
|
maxLabelsPerTimeseries: "90"
|
|
|
|
search.minStalenessInterval: 5m
|
|
|
|
vmalert.proxyURL: http://vmalert-victoria-metrics.observability.svc.cluster.local:8080
|
|
|
|
retentionPeriod: 1y
|
|
|
|
storage:
|
|
|
|
storageClassName: "openebs-zfs"
|
|
|
|
resources:
|
|
|
|
requests:
|
|
|
|
storage: "50Gi"
|
|
|
|
accessModes:
|
|
|
|
- ReadWriteOnce
|
|
|
|
ingress:
|
|
|
|
enabled: true
|
|
|
|
ingressClassName: internal-nginx
|
|
|
|
hosts:
|
|
|
|
- vm.jahanson.tech
|
|
|
|
|
|
|
|
# VM Alerting (however, this just watches & passes alerts to alertmanager)
|
|
|
|
vmalert:
|
|
|
|
enabled: true
|
|
|
|
spec:
|
|
|
|
replicaCount: 1
|
|
|
|
resources:
|
|
|
|
requests:
|
|
|
|
cpu: 50m
|
|
|
|
memory: 128Mi
|
|
|
|
limits:
|
|
|
|
cpu: 150m
|
|
|
|
memory: 256Mi
|
|
|
|
topologySpreadConstraints:
|
|
|
|
- maxSkew: 1
|
|
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
whenUnsatisfiable: DoNotSchedule
|
|
|
|
labelSelector:
|
|
|
|
matchLabels:
|
|
|
|
app.kubernetes.io/name: vmalert
|
|
|
|
extraArgs:
|
|
|
|
external.url: https://vmalert.jahanson.tech
|
|
|
|
notifiers:
|
|
|
|
- url: http://alertmanager.observability.svc.cluster.local:9093
|
|
|
|
ingress:
|
|
|
|
enabled: true
|
|
|
|
ingressClassName: internal-nginx
|
|
|
|
hosts:
|
|
|
|
- vmalert.jahanson.tech
|
|
|
|
|
|
|
|
# VM Data scraping
|
|
|
|
vmagent:
|
|
|
|
enabled: true
|
|
|
|
spec:
|
|
|
|
replicaCount: 1
|
|
|
|
shardCount: 2
|
|
|
|
scrapeInterval: 30s
|
|
|
|
externalLabels:
|
|
|
|
cluster: main
|
|
|
|
resources:
|
|
|
|
requests:
|
|
|
|
cpu: 50m
|
|
|
|
memory: 256Mi
|
|
|
|
limits:
|
|
|
|
cpu: 400m
|
|
|
|
memory: 512Mi
|
|
|
|
topologySpreadConstraints:
|
|
|
|
- maxSkew: 1
|
|
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
whenUnsatisfiable: DoNotSchedule
|
|
|
|
labelSelector:
|
|
|
|
matchLabels:
|
|
|
|
app.kubernetes.io/name: vmagent
|
|
|
|
additionalScrapeConfigs:
|
|
|
|
name: vm-additional-scrape-configs
|
|
|
|
key: prometheus-additional.yaml
|
|
|
|
ingress:
|
|
|
|
enabled: true
|
|
|
|
ingressClassName: internal-nginx
|
|
|
|
hosts:
|
|
|
|
- vmagent.jahanson.tech
|
|
|
|
|
|
|
|
# Extra slack templates
|
|
|
|
monzoTemplate:
|
|
|
|
enabled: false
|
|
|
|
|
|
|
|
# Scrape configs
|
|
|
|
kubelet:
|
|
|
|
enabled: true
|
|
|
|
spec:
|
|
|
|
interval: 30s
|
|
|
|
# drop high cardinality label and useless metrics for cadvisor and kubelet
|
|
|
|
metricRelabelConfigs:
|
|
|
|
- action: labeldrop
|
|
|
|
regex: (uid|pod_uid|id)
|
|
|
|
- action: labeldrop
|
|
|
|
regex: (name)
|
|
|
|
- action: drop
|
|
|
|
source_labels: [__name__]
|
|
|
|
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
|
|
|
|
- action: drop
|
|
|
|
source_labels: [__name__]
|
|
|
|
regex: (container_tasks_state|container_memory_failures_total)
|
|
|
|
- action: drop
|
|
|
|
source_labels: [__name__]
|
|
|
|
regex: (container_blkio_device_usage_total)
|
|
|
|
- action: drop
|
|
|
|
source_labels: [__name__]
|
|
|
|
regex: (prober_probe_duration_seconds_bucket)
|
|
|
|
relabelConfigs:
|
|
|
|
- action: labelmap
|
|
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
|
|
- sourceLabels: [__metrics_path__]
|
|
|
|
targetLabel: metrics_path
|
|
|
|
- targetLabel: "job"
|
|
|
|
replacement: "kubelet"
|
|
|
|
# kubeApiServer:
|
|
|
|
# enabled: false
|
|
|
|
# kubeControllerManager:
|
|
|
|
# enabled: false
|
|
|
|
# kubeProxy:
|
|
|
|
# enabled: false
|
|
|
|
# kubeScheduler:
|
|
|
|
# enabled: false
|
|
|
|
# kubeEtcd:
|
|
|
|
# enabled: false
|
|
|
|
|
|
|
|
# Enable deployment of kube-state-metrics
|
|
|
|
# kube-state-metrics:
|
|
|
|
# enabled: false
|
|
|
|
|
|
|
|
# Enable deployment of prometheus-node-exporter
|
|
|
|
# prometheus-node-exporter:
|
|
|
|
# enabled: false
|
|
|
|
|
|
|
|
# Enable deployment of grafana
|
|
|
|
# defaultDashboardsEnabled: false
|
|
|
|
# grafana:
|
|
|
|
# enabled: false
|
|
|
|
|
|
|
|
# Enable deployment of alertmanager
|
|
|
|
# alertmanager:
|
|
|
|
# enabled: false
|
|
|
|
|
|
|
|
# Prepared sets of default rules
|
|
|
|
# Adjust to what scraping functions you have enabled
|
|
|
|
# i.e. if you dont have kubeapisever setup & enabled, disable
|
|
|
|
# the kubeApiserver rules below
|
|
|
|
defaultRules:
|
|
|
|
create: true
|
|
|
|
rules:
|
|
|
|
etcd: false
|
|
|
|
general: true
|
|
|
|
k8s: true
|
|
|
|
kubeApiserver: true
|
|
|
|
kubeApiserverAvailability: true
|
|
|
|
kubeApiserverBurnrate: true
|
|
|
|
kubeApiserverHistogram: true
|
|
|
|
kubeApiserverSlos: true
|
|
|
|
kubelet: true
|
|
|
|
kubePrometheusGeneral: true
|
|
|
|
kubePrometheusNodeRecording: true
|
|
|
|
kubernetesApps: true
|
|
|
|
kubernetesResources: true
|
|
|
|
kubernetesStorage: true
|
|
|
|
kubernetesSystem: true
|
|
|
|
kubeScheduler: false
|
|
|
|
kubeStateMetrics: true
|
|
|
|
network: true
|
|
|
|
node: true
|
|
|
|
vmagent: true
|
|
|
|
vmsingle: false
|
|
|
|
vmhealth: true
|
|
|
|
alertmanager: false
|