Adding the first part of monitoring stack.
This commit is contained in:
parent
8cc09e36ae
commit
fea9d8227d
19 changed files with 743 additions and 13 deletions
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: grafana
|
||||
spec:
|
||||
secretStoreRef:
|
||||
kind: ClusterSecretStore
|
||||
name: onepassword-connect
|
||||
target:
|
||||
name: grafana-secret
|
||||
template:
|
||||
engineVersion: v2
|
||||
data:
|
||||
GF_DATABASE_NAME: &dbName grafana
|
||||
GF_DATABASE_HOST: postgres16-rw.database.svc.cluster.local:5432
|
||||
GF_DATABASE_USER: &dbUser "{{ .GRAFANA_POSTGRES_USER }}"
|
||||
GF_DATABASE_PASSWORD: &dbPass "{{ .GRAFANA_POSTGRES_PASS }}"
|
||||
GF_DATABASE_SSL_MODE: disable
|
||||
GF_DATABASE_TYPE: postgres
|
||||
GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: "{{ .GRAFANA_OAUTH_CLIENT_SECRET }}"
|
||||
INIT_POSTGRES_DBNAME: *dbName
|
||||
INIT_POSTGRES_HOST: postgres16-rw.database.svc.cluster.local
|
||||
INIT_POSTGRES_USER: *dbUser
|
||||
INIT_POSTGRES_PASS: *dbPass
|
||||
INIT_POSTGRES_SUPER_PASS: "{{ .POSTGRES_SUPER_PASS }}"
|
||||
dataFrom:
|
||||
- extract:
|
||||
key: grafana
|
||||
- extract:
|
||||
key: cloudnative-pg
|
|
@ -0,0 +1,86 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/postgres-operator.crunchydata.com/postgrescluster_v1beta1.json
|
||||
apiVersion: postgres-operator.crunchydata.com/v1beta1
|
||||
kind: PostgresCluster
|
||||
metadata:
|
||||
name: "${APP}"
|
||||
spec:
|
||||
postgresVersion: 16
|
||||
dataSource:
|
||||
pgbackrest:
|
||||
stanza: db
|
||||
configuration:
|
||||
- secret:
|
||||
name: pgo-s3-creds
|
||||
global:
|
||||
repo1-path: "/${APP}/repo1"
|
||||
repo1-s3-uri-style: path
|
||||
repo:
|
||||
name: repo1
|
||||
s3:
|
||||
bucket: "crunchy-postgres"
|
||||
endpoint: "s3.hsn.dev"
|
||||
region: "us-east-1"
|
||||
patroni:
|
||||
dynamicConfiguration:
|
||||
synchronous_mode: true
|
||||
postgresql:
|
||||
synchronous_commit: "on"
|
||||
pg_hba:
|
||||
- hostnossl all all 10.32.0.0/16 md5
|
||||
- hostssl all all all md5
|
||||
instances:
|
||||
- name: postgres
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: pgo-${APP}
|
||||
replicas: 2
|
||||
dataVolumeClaimSpec:
|
||||
storageClassName: openebs-hostpath
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: "kubernetes.io/hostname"
|
||||
whenUnsatisfiable: "DoNotSchedule"
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
postgres-operator.crunchydata.com/cluster: ${APP}
|
||||
postgres-operator.crunchydata.com/data: postgres
|
||||
users:
|
||||
- name: "grafana"
|
||||
databases:
|
||||
- "grafana"
|
||||
options: "SUPERUSER"
|
||||
password:
|
||||
type: AlphaNumeric
|
||||
backups:
|
||||
pgbackrest:
|
||||
configuration:
|
||||
- secret:
|
||||
name: pgo-s3-creds
|
||||
global:
|
||||
archive-push-queue-max: 4GiB
|
||||
repo1-retention-full: "14"
|
||||
repo1-retention-full-type: time
|
||||
repo1-path: "/${APP}/repo1"
|
||||
repo1-s3-uri-style: path
|
||||
manual:
|
||||
repoName: repo1
|
||||
options:
|
||||
- --type=full
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: pgo-${APP}-backup
|
||||
repos:
|
||||
- name: repo1
|
||||
schedules:
|
||||
full: "0 1 * * 0"
|
||||
differential: "0 1 * * 1-6"
|
||||
s3:
|
||||
bucket: "crunchy-postgres"
|
||||
endpoint: "s3.hsn.dev"
|
||||
region: "us-east-1"
|
27
kubernetes/apps/observability/grafana/ks.yaml
Normal file
27
kubernetes/apps/observability/grafana/ks.yaml
Normal file
|
@ -0,0 +1,27 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: &app grafana
|
||||
namespace: flux-system
|
||||
spec:
|
||||
targetNamespace: observability
|
||||
commonMetadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: *app
|
||||
dependsOn:
|
||||
- name: crunchy-postgres-operator
|
||||
- name: external-secrets-stores
|
||||
path: ./kubernetes/apps/observability/grafana/app
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: homelab
|
||||
wait: false
|
||||
interval: 30m
|
||||
retryInterval: 1m
|
||||
timeout: 5m
|
||||
postBuild:
|
||||
substitute:
|
||||
APP: *app
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: alertmanager
|
||||
spec:
|
||||
refreshInterval: 5m
|
||||
secretStoreRef:
|
||||
kind: ClusterSecretStore
|
||||
name: onepassword-connect
|
||||
target:
|
||||
name: alertmanager-secret
|
||||
template:
|
||||
templateFrom:
|
||||
- configMap:
|
||||
name: alertmanager-config-tpl
|
||||
items:
|
||||
- key: alertmanager.yaml
|
||||
dataFrom:
|
||||
- extract:
|
||||
key: pushover
|
|
@ -0,0 +1,203 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2beta2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: kube-prometheus-stack
|
||||
spec:
|
||||
interval: 30m
|
||||
timeout: 15m
|
||||
chart:
|
||||
spec:
|
||||
chart: kube-prometheus-stack
|
||||
version: 56.13.0
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: prometheus-community
|
||||
namespace: flux-system
|
||||
install:
|
||||
crds: CreateReplace
|
||||
remediation:
|
||||
retries: 3
|
||||
upgrade:
|
||||
cleanupOnFail: true
|
||||
crds: CreateReplace
|
||||
remediation:
|
||||
retries: 3
|
||||
uninstall:
|
||||
keepHistory: false
|
||||
dependsOn:
|
||||
- name: openebs
|
||||
namespace: openebs-system
|
||||
- name: thanos
|
||||
namespace: observability
|
||||
values:
|
||||
crds:
|
||||
enabled: true
|
||||
cleanPrometheusOperatorObjectNames: true
|
||||
alertmanager:
|
||||
ingress:
|
||||
enabled: true
|
||||
pathType: Prefix
|
||||
ingressClassName: internal-nginx
|
||||
hosts:
|
||||
- &host alertmanager.jahanson.tech
|
||||
tls:
|
||||
- hosts:
|
||||
- *host
|
||||
alertmanagerSpec:
|
||||
replicas: 2
|
||||
useExistingSecret: true
|
||||
configSecret: alertmanager-secret
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: openebs-hostpath
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
kubelet:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
metricRelabelings:
|
||||
# Drop high cardinality labels
|
||||
- action: labeldrop
|
||||
regex: (uid)
|
||||
- action: labeldrop
|
||||
regex: (id|name)
|
||||
- action: drop
|
||||
sourceLabels: ["__name__"]
|
||||
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
|
||||
kubeApiServer:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
metricRelabelings:
|
||||
# Drop high cardinality labels
|
||||
- action: drop
|
||||
sourceLabels: ["__name__"]
|
||||
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
|
||||
- action: drop
|
||||
sourceLabels: ["__name__"]
|
||||
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
|
||||
kubeControllerManager:
|
||||
enabled: true
|
||||
endpoints: &cp
|
||||
- 192.168.1.61
|
||||
- 192.168.1.62
|
||||
- 192.168.1.63
|
||||
kubeEtcd:
|
||||
enabled: true
|
||||
endpoints: *cp
|
||||
kubeScheduler:
|
||||
enabled: true
|
||||
endpoints: *cp
|
||||
kubeProxy:
|
||||
enabled: false
|
||||
prometheus:
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: internal-nginx
|
||||
pathType: Prefix
|
||||
hosts:
|
||||
- &host prometheus.jahanson.tech
|
||||
tls:
|
||||
- hosts:
|
||||
- *host
|
||||
thanosService:
|
||||
enabled: true
|
||||
thanosServiceMonitor:
|
||||
enabled: true
|
||||
thanosServiceExternal:
|
||||
enabled: true
|
||||
type: LoadBalancer
|
||||
annotations:
|
||||
external-dns.alpha.kubernetes.io/hostname: thanos.jahanson.tech
|
||||
io.cilium/lb-ipam-ips: 10.45.0.6
|
||||
externalTrafficPolicy: Cluster
|
||||
prometheusSpec:
|
||||
replicas: 2
|
||||
replicaExternalLabelName: __replica__
|
||||
ruleSelectorNilUsesHelmValues: false
|
||||
serviceMonitorSelectorNilUsesHelmValues: false
|
||||
podMonitorSelectorNilUsesHelmValues: false
|
||||
probeSelectorNilUsesHelmValues: false
|
||||
scrapeConfigSelectorNilUsesHelmValues: false
|
||||
enableAdminAPI: true
|
||||
walCompression: true
|
||||
enableFeatures:
|
||||
- auto-gomaxprocs
|
||||
- memory-snapshot-on-shutdown
|
||||
- new-service-discovery-manager
|
||||
thanos:
|
||||
image: quay.io/thanos/thanos:${THANOS_VERSION}
|
||||
version: "${THANOS_VERSION#v}"
|
||||
objectStorageConfig:
|
||||
existingSecret:
|
||||
name: thanos-objstore-secret
|
||||
key: objstore.yml
|
||||
retention: 2d
|
||||
retentionSize: 15GB
|
||||
externalLabels:
|
||||
cluster: main
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: openebs-hostpath
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
nodeExporter:
|
||||
enabled: true
|
||||
prometheus-node-exporter:
|
||||
fullnameOverride: node-exporter
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: true
|
||||
relabelings:
|
||||
- action: replace
|
||||
regex: (.*)
|
||||
replacement: $1
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
targetLabel: kubernetes_node
|
||||
kubeStateMetrics:
|
||||
enabled: true
|
||||
kube-state-metrics:
|
||||
fullnameOverride: kube-state-metrics
|
||||
metricLabelsAllowlist:
|
||||
- pods=[*]
|
||||
- deployments=[*]
|
||||
- persistentvolumeclaims=[*]
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: true
|
||||
relabelings:
|
||||
- action: replace
|
||||
regex: (.*)
|
||||
replacement: $1
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
targetLabel: kubernetes_node
|
||||
grafana:
|
||||
enabled: false
|
||||
forceDeployDashboards: true
|
||||
sidecar:
|
||||
dashboards:
|
||||
multicluster:
|
||||
etcd:
|
||||
enabled: true
|
||||
postRenderers:
|
||||
- kustomize:
|
||||
patches:
|
||||
- target:
|
||||
version: v1
|
||||
kind: ConfigMap
|
||||
labelSelector: grafana_dashboard in (1)
|
||||
patch: |-
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: not-used
|
||||
namespace: not-used
|
||||
annotations:
|
||||
grafana_folder: Kubernetes
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ./externalsecret.yaml
|
||||
- ./helmrelease.yaml
|
||||
- ./prometheusrules
|
||||
- ./scrapeconfigs
|
||||
configMapGenerator:
|
||||
- name: alertmanager-config-tpl
|
||||
files:
|
||||
- alertmanager.yaml=./resources/alertmanager.yaml
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ./prometheusrule.yaml
|
|
@ -0,0 +1,37 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/prometheusrule_v1.json
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: miscellaneous-rules
|
||||
labels:
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
spec:
|
||||
groups:
|
||||
- name: dockerhub
|
||||
rules:
|
||||
- alert: BootstrapRateLimitRisk
|
||||
annotations:
|
||||
summary: Kubernetes cluster at risk of being rate limited by dockerhub on bootstrap
|
||||
expr: count(time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30) > 100
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: oom
|
||||
rules:
|
||||
- alert: OOMKilled
|
||||
annotations:
|
||||
summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
|
||||
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
|
||||
labels:
|
||||
severity: critical
|
||||
- name: zfs
|
||||
rules:
|
||||
- alert: ZfsUnexpectedPoolState
|
||||
annotations:
|
||||
summary: ZFS pool {{$labels.zpool}} on {{$labels.instance}} is in a unexpected state {{$labels.state}}
|
||||
expr: node_zfs_zpool_state{state!="online"} > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
|
@ -0,0 +1,68 @@
|
|||
---
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ["alertname", "job"]
|
||||
group_interval: 10m
|
||||
group_wait: 1m
|
||||
receiver: pushover
|
||||
repeat_interval: 12h
|
||||
routes:
|
||||
- receiver: heartbeat
|
||||
group_interval: 5m
|
||||
group_wait: 0s
|
||||
matchers:
|
||||
- alertname =~ "Watchdog"
|
||||
repeat_interval: 5m
|
||||
- receiver: "null"
|
||||
matchers:
|
||||
- alertname =~ "InfoInhibitor"
|
||||
- receiver: pushover
|
||||
continue: true
|
||||
matchers:
|
||||
- severity = "critical"
|
||||
inhibit_rules:
|
||||
- equal: ["alertname", "namespace"]
|
||||
source_matchers:
|
||||
- severity = "critical"
|
||||
target_matchers:
|
||||
- severity = "warning"
|
||||
receivers:
|
||||
- name: heartbeat
|
||||
webhook_configs:
|
||||
- send_resolved: true
|
||||
url: "{{ .alertmanager_heartbeat_url }}"
|
||||
- name: "null"
|
||||
- name: pushover
|
||||
pushover_configs:
|
||||
- html: true
|
||||
# Compooters are hard
|
||||
message: |-
|
||||
{{ "{{-" }} range .Alerts {{ "}}" }}
|
||||
{{ "{{-" }} if ne .Annotations.description "" {{ "}}" }}
|
||||
{{ "{{" }} .Annotations.description {{ "}}" }}
|
||||
{{ "{{-" }} else if ne .Annotations.summary "" {{ "}}" }}
|
||||
{{ "{{" }} .Annotations.summary {{ "}}" }}
|
||||
{{ "{{-" }} else if ne .Annotations.message "" {{ "}}" }}
|
||||
{{ "{{" }} .Annotations.message {{ "}}" }}
|
||||
{{ "{{-" }} else {{ "}}" }}
|
||||
Alert description not available
|
||||
{{ "{{-" }} end {{ "}}" }}
|
||||
{{ "{{-" }} if gt (len .Labels.SortedPairs) 0 {{ "}}" }}
|
||||
<small>
|
||||
{{ "{{-" }} range .Labels.SortedPairs {{ "}}" }}
|
||||
<b>{{ "{{" }} .Name {{ "}}" }}:</b> {{ "{{" }} .Value {{ "}}" }}
|
||||
{{ "{{-" }} end {{ "}}" }}
|
||||
</small>
|
||||
{{ "{{-" }} end {{ "}}" }}
|
||||
{{ "{{-" }} end {{ "}}" }}
|
||||
priority: |-
|
||||
{{ "{{" }} if eq .Status "firing" {{ "}}" }}1{{ "{{" }} else {{ "}}" }}0{{ "{{" }} end {{ "}}" }}
|
||||
send_resolved: true
|
||||
sound: gamelan
|
||||
title: >-
|
||||
{{ "{{" }} .CommonLabels.alertname {{ "}}" }}
|
||||
[{{ "{{" }} .Status | toUpper {{ "}}" }}{{ "{{" }} if eq .Status "firing" {{ "}}" }}:{{ "{{" }} .Alerts.Firing | len {{ "}}" }}{{ "{{" }} end {{ "}}" }}]
|
||||
token: "{{ .alertmanager_token }}"
|
||||
url_title: View in Alertmanager
|
||||
user_key: "{{ .userkey_jahanson }}"
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ./node-exporter.yaml
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ScrapeConfig
|
||||
metadata:
|
||||
name: node-exporter
|
||||
spec:
|
||||
staticConfigs:
|
||||
- targets:
|
||||
- 10.1.1.1:9100
|
||||
metricsPath: /metrics
|
27
kubernetes/apps/observability/kube-prometheus-stack/ks.yaml
Normal file
27
kubernetes/apps/observability/kube-prometheus-stack/ks.yaml
Normal file
|
@ -0,0 +1,27 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: &app kube-prometheus-stack
|
||||
namespace: flux-system
|
||||
spec:
|
||||
targetNamespace: observability
|
||||
commonMetadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: *app
|
||||
dependsOn:
|
||||
- name: external-secrets-stores
|
||||
path: ./kubernetes/apps/observability/kube-prometheus-stack/app
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: homelab
|
||||
wait: false
|
||||
interval: 30m
|
||||
retryInterval: 1m
|
||||
timeout: 15m
|
||||
postBuild:
|
||||
substitute:
|
||||
# renovate: datasource=docker depName=quay.io/thanos/thanos
|
||||
THANOS_VERSION: v0.34.1
|
17
kubernetes/apps/observability/kustomization.yaml
Normal file
17
kubernetes/apps/observability/kustomization.yaml
Normal file
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
# Pre Flux-Kustomizations
|
||||
- ./namespace.yaml
|
||||
# Flux-Kustomizations
|
||||
# - ./gatus/ks.yaml
|
||||
# - ./grafana/ks.yaml
|
||||
- ./kube-prometheus-stack/ks.yaml
|
||||
# - ./loki/ks.yaml
|
||||
# - ./smartctl-exporter/ks.yaml
|
||||
# - ./snmp-exporter/ks.yaml
|
||||
- ./thanos/ks.yaml
|
||||
# - ./unpoller/ks.yaml
|
||||
# - ./vector/ks.yaml
|
8
kubernetes/apps/observability/namespace.yaml
Normal file
8
kubernetes/apps/observability/namespace.yaml
Normal file
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: observability
|
||||
labels:
|
||||
kustomize.toolkit.fluxcd.io/prune: disabled
|
||||
pgo-enabled-hsn.dev: "true"
|
31
kubernetes/apps/observability/thanos/app/externalsecret.yaml
Normal file
31
kubernetes/apps/observability/thanos/app/externalsecret.yaml
Normal file
|
@ -0,0 +1,31 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: thanos
|
||||
namespace: monitoring
|
||||
spec:
|
||||
secretStoreRef:
|
||||
kind: ClusterSecretStore
|
||||
name: onepassword-connect
|
||||
target:
|
||||
name: thanos-s3-secret
|
||||
creationPolicy: Owner
|
||||
template:
|
||||
engineVersion: v2
|
||||
data:
|
||||
objstore.yml: |-
|
||||
type: s3
|
||||
config:
|
||||
access_key: {{ .s3_thanos_access_key }}
|
||||
bucket: {{ .s3_thanos_bucket_name }}
|
||||
endpoint: {{ .s3_homelab_endpoint }}
|
||||
secret_key: {{ .s3_thanos_secret_key }}
|
||||
dataFrom:
|
||||
- extract:
|
||||
key: Minio
|
||||
rewrite:
|
||||
- regexp:
|
||||
source: "(.*)"
|
||||
target: "s3_$1"
|
132
kubernetes/apps/observability/thanos/app/helmrelease.yaml
Normal file
132
kubernetes/apps/observability/thanos/app/helmrelease.yaml
Normal file
|
@ -0,0 +1,132 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2beta2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: thanos
|
||||
spec:
|
||||
interval: 30m
|
||||
timeout: 15m
|
||||
chart:
|
||||
spec:
|
||||
chart: thanos
|
||||
version: 13.2.2
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: bitnami
|
||||
namespace: flux-system
|
||||
install:
|
||||
remediation:
|
||||
retries: 3
|
||||
upgrade:
|
||||
cleanupOnFail: true
|
||||
remediation:
|
||||
retries: 3
|
||||
uninstall:
|
||||
keepHistory: false
|
||||
dependsOn:
|
||||
- name: openebs
|
||||
namespace: openebs-system
|
||||
- name: dragonfly-operator
|
||||
namespace: dragonfly-operator-system
|
||||
- name: rook-ceph-cluster
|
||||
namespace: rook-ceph
|
||||
values:
|
||||
existingObjstoreSecret: thanos-s3-secret
|
||||
image:
|
||||
registry: quay.io
|
||||
repository: thanos/thanos
|
||||
tag: v0.34.1
|
||||
objstoreConfig:
|
||||
type: s3
|
||||
config:
|
||||
insecure: true
|
||||
receive:
|
||||
enabled: false
|
||||
networkPolicy:
|
||||
enabled: false
|
||||
queryFrontend:
|
||||
enabled: true
|
||||
replicaCount: 2
|
||||
config: &config |-
|
||||
type: REDIS
|
||||
config:
|
||||
addr: >-
|
||||
dragonfly.database.svc.cluster.local:6379,
|
||||
db: 13
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: internal-nginx
|
||||
hostname: &host thanos-query-frontend.jahanson.tech
|
||||
tls: true
|
||||
extraTls:
|
||||
- hosts:
|
||||
- *host
|
||||
networkPolicy:
|
||||
enabled: false
|
||||
query:
|
||||
enabled: true
|
||||
replicaCount: 2
|
||||
replicaLabel: ["__replica__"]
|
||||
dnsDiscovery:
|
||||
sidecarsService: kube-prometheus-stack-thanos-discovery
|
||||
sidecarsNamespace: observability
|
||||
stores: ["thanos.jahanson.tech:10901"]
|
||||
networkPolicy:
|
||||
enabled: false
|
||||
bucketweb:
|
||||
enabled: true
|
||||
replicaCount: 2
|
||||
networkPolicy:
|
||||
enabled: false
|
||||
compactor:
|
||||
enabled: true
|
||||
extraFlags:
|
||||
- --compact.concurrency=4
|
||||
- --delete-delay=30m
|
||||
retentionResolutionRaw: 14d
|
||||
retentionResolution5m: 30d
|
||||
retentionResolution1h: 60d
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: openebs-hostpath
|
||||
size: 10Gi
|
||||
networkPolicy:
|
||||
enabled: false
|
||||
storegateway:
|
||||
enabled: true
|
||||
replicaCount: 2
|
||||
config: *config
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: openebs-hostpath
|
||||
size: 10Gi
|
||||
networkPolicy:
|
||||
enabled: false
|
||||
ruler:
|
||||
enabled: true
|
||||
replicaCount: 2
|
||||
replicaLabel: __replica__
|
||||
alertmanagers: ["http://alertmanager-operated.observability.svc.cluster.local:9093"]
|
||||
extraFlags: ["--web.prefix-header=X-Forwarded-Prefix"]
|
||||
config: |-
|
||||
groups:
|
||||
- name: PrometheusWatcher
|
||||
rules:
|
||||
- alert: PrometheusDown
|
||||
annotations:
|
||||
summary: A Prometheus has disappeared from Prometheus target discovery
|
||||
expr: absent(up{job="kube-prometheus-stack-prometheus"})
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: openebs-hostpath
|
||||
size: 10Gi
|
||||
networkPolicy:
|
||||
enabled: false
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ./externalsecret.yaml
|
||||
- ./helmrelease.yaml
|
0
kubernetes/apps/observability/thanos/ks.yaml
Normal file
0
kubernetes/apps/observability/thanos/ks.yaml
Normal file
|
@ -4,12 +4,7 @@ metadata:
|
|||
name: cluster-secrets
|
||||
namespace: flux-system
|
||||
stringData:
|
||||
SECRET_PUSHOVER_USERKEY: ENC[AES256_GCM,data:HknjiEQXIa1zntN4yOlTQ/buKx2xppiQV7faAxIe,iv:A9sMptT1QcgQvuP8jqPUZDjqTa56kbsLBjITQvPQyF8=,tag:Sa5PIweT7OYuoq5YG43rpA==,type:str]
|
||||
SECRET_PUSHOVER_ALERT_MANAGER_APIKEY: ENC[AES256_GCM,data:n0cFsAwCX1/y5HhsNxr/c2KT/5dzt55Ygi17rX+OV7cwKPKMImmLinb6GhD9fDIz1AINGBijXuXvD8TL,iv:4nwdHlSJEUSyMEDvh+5mhONXCGTJ3qyTITwG6CxeG3A=,tag:kurCrF2rGQFBF2u7Hhinuw==,type:str]
|
||||
SECRET_HEALTHCHECKS_WEBHOOK: ENC[AES256_GCM,data:YG8/g4i8inIQnCIsQyEkPdNyVmbFYU4bhixacOEEEcuJMl8ax8TH1yBRl5ziQmBggp/CETorWCmNiC3jkUXYYta/znlo76T5,iv:SGdg9htpyFP38jbAJDg+zq4Rs+axgM5m3SsgBG38Bu8=,tag:TTIVFki9e03rqVvNmtsFuw==,type:str]
|
||||
SECRET_CLOUDFLARE_ACCOUNT_ID: ENC[AES256_GCM,data:bKGSKh/TxNtCMRa83/i44fX7XC5mRxBLVeZ94UltjOo=,iv:Ji0tUnrvDywxMeCvNwBrG/a8JVudfK4sXYL8q0i/cz8=,tag:j4Bwvcz73RdIInsiz0F0JA==,type:str]
|
||||
SECRET_CLUSTER_CLOUDFLARE_TUNNEL_ID: ENC[AES256_GCM,data:bl9psiIxkDTchopNuPNxaGy7fQWJLdZwfnqTi8AOSl5cFMAZ,iv:CKYrQHv8fiHU4312Wfo6XlMofiR6uWP+AafO1n1y970=,tag:iyceSr/VUtE2cNbndkmV1g==,type:str]
|
||||
K8S_SERVICE_ENDPOINT: ENC[AES256_GCM,data:3s9EeJwFzDQ=,iv:a4oU9bf7ESscw6o9YqhBx8kRm/rL1l2ydjjd1ngn/P0=,tag:TAwJ2UmFuEHeHsEhfiVH9g==,type:str]
|
||||
CLUSTER_SECRET_CLOUDFLARE_ACCOUNT_ID: ENC[AES256_GCM,data:bQvXy9wHJcVKCa9xb89Ji2VSBmsxPKuEXIG/+KiclmM=,iv:63JdSorOBh2uz98ajzdtydSbJH3wKEaX5fRP3LX8g9Q=,tag:NH7Y6EoWaEGVal7E0XHg0w==,type:str]
|
||||
sops:
|
||||
kms: []
|
||||
gcp_kms: []
|
||||
|
@ -19,14 +14,14 @@ sops:
|
|||
- recipient: age1eqlaq205y5jre9hu5hvulywa7w3d4qyxwmafneamxcn7nejesedsf4q9g6
|
||||
enc: |
|
||||
-----BEGIN AGE ENCRYPTED FILE-----
|
||||
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBwSC9CNFkwMHVLd0dWb0Jq
|
||||
cnN0OUJzYVlYV2VRS3p2ek5UcHl4TXNQckhjCnlHQTVNNmdyZFF6RXhETlBzSW9v
|
||||
S00ra2k2Y0VyWnJjcU9oWG5XVGJDQkkKLS0tIHB2bGxDOWhWci81aGViVFlsL0JE
|
||||
ZGRUUFpKTXpjWW9HQ0R1VDk2RmVmQ2MKJwHW3q0vCZClJFfDrWSLw6C43vWVfyLr
|
||||
1ACvmNWml+xv/MOQwoRRMx6OVF74X83UyTFdVrXXk7SkzRcwQr4j+A==
|
||||
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBUdHVVdXUyMUlYc01Va25F
|
||||
aXg2YWVDdnQwQnRGMWE4SEJtUnNka216YkVRCks5SUJBMzIxY25PWXQzSlBybkdL
|
||||
Smwxc1hscTlNdzkzUWVPaXBYNkg5RWsKLS0tIGg0UHU3NGlpR1I5RjAvK1NvS3hl
|
||||
K3J3NTZHQlhIOEt6YnZ6QU5QZ0JLT3MKYyy736Q4oXmaryf+JLlgEoK64iGDlUDg
|
||||
JbdxbEfCPh3xbuTAff5oU0LxX9XVsoKBO/8+ew6+P/8bcjeb9sNCEg==
|
||||
-----END AGE ENCRYPTED FILE-----
|
||||
lastmodified: "2024-01-12T19:24:10Z"
|
||||
mac: ENC[AES256_GCM,data:EdmF3LFSmBFe6Vn5LzVmOb6tyOYto4iwIfJlUL50pjIobvw073oTwd99NkZ9m6aXB2no6ghgPc2RU8jOAtK9gg71kvLOGP45VZ07zLbcxsM8iEkSp2UX2k07/WavdXXGY4yBswGCZgnuPKah6uVNs1s8zEQNCkQQu0D1Ukf3SJY=,iv:7+sUShSrv6iwBJUgT03l38Wg9yX4G1LeXpGgHlOuMnE=,tag:rgXF0E/BIfeyYwnAYYJBsQ==,type:str]
|
||||
lastmodified: "2024-02-27T17:14:28Z"
|
||||
mac: ENC[AES256_GCM,data:0OKbP4/zLiMI7KU0WNXfZ62uVKTKBsJJux36ULEI2nd4AEpp57r7hH4DdAcUW9lCB6ZSvXMNytOM2T5GPHDOvEjrne0tv+jMbrp1daBCM08FUDsbjt0tl2veU43wz9KYWe2AlvmwOZPna614fQVFGtaeu79TRu938p2Gz/BnElc=,iv://gX/mf4C/TtTgUKOg6M7m1y6b2mDTk8PjR9Zwusl9c=,tag:uYapGpdIRJfL26kjw3a8Vw==,type:str]
|
||||
pgp: []
|
||||
encrypted_regex: ^(data|stringData)$
|
||||
version: 3.8.1
|
||||
|
|
Reference in a new issue