Compare commits

..

No commits in common. "0c6deac2c60067f78ff529f7287f527c8c807e66" and "2e2da1768fe4854d602da2003307d25f16957b02" have entirely different histories.

438 changed files with 15687 additions and 659 deletions

9
.ansible-lint Normal file
View file

@ -0,0 +1,9 @@
---
skip_list:
- yaml[line-length]
- var-naming
warn_list:
- command-instead-of-shell
- deprecated-command-syntax
- experimental
- no-changed-when

View file

@ -0,0 +1,52 @@
---
# yaml-language-server: $schema=https://taskfile.dev/schema.json
version: "3"
vars:
PYTHON_BIN: python3
env:
PATH: "{{.ROOT_DIR}}/.venv/bin:$PATH"
VIRTUAL_ENV: "{{.ROOT_DIR}}/.venv"
ANSIBLE_COLLECTIONS_PATH: "{{.ROOT_DIR}}/.venv/galaxy"
ANSIBLE_ROLES_PATH: "{{.ROOT_DIR}}/.venv/galaxy/ansible_roles"
ANSIBLE_VARS_ENABLED: "host_group_vars,community.sops.sops"
tasks:
deps:
desc: Set up Ansible dependencies for the environment
cmds:
- task: .venv
run:
desc: Run an Ansible playbook for configuring a cluster
summary: |
Args:
cluster: Cluster to run command against (required)
playbook: Playbook to run (required)
prompt: Run Ansible playbook '{{.playbook}}' against the '{{.cluster}}' cluster... continue?
deps: ["deps"]
cmd: |
.venv/bin/ansible-playbook \
--inventory {{.ANSIBLE_DIR}}/{{.cluster}}/inventory/hosts.yaml \
{{.ANSIBLE_DIR}}/{{.cluster}}/playbooks/{{.playbook}}.yaml {{.CLI_ARGS}}
preconditions:
- { msg: "Argument (cluster) is required", sh: "test -n {{.cluster}}" }
- { msg: "Argument (playbook) is required", sh: "test -n {{.playbook}}" }
- { msg: "Venv not found", sh: "test -d {{.ROOT_DIR}}/.venv" }
- { msg: "Inventory not found", sh: "test -f {{.ANSIBLE_DIR}}/{{.cluster}}/inventory/hosts.yaml" }
- { msg: "Playbook not found", sh: "test -f {{.ANSIBLE_DIR}}/{{.cluster}}/playbooks/{{.playbook}}.yaml" }
.venv:
internal: true
cmds:
- true && {{.PYTHON_BIN}} -m venv {{.ROOT_DIR}}/.venv
- .venv/bin/python3 -m pip install --upgrade pip setuptools wheel
- .venv/bin/python3 -m pip install --upgrade --requirement {{.ANSIBLE_DIR}}/requirements.txt
- .venv/bin/ansible-galaxy install --role-file "{{.ANSIBLE_DIR}}/requirements.yaml" --force
sources:
- "{{.ANSIBLE_DIR}}/requirements.txt"
- "{{.ANSIBLE_DIR}}/requirements.yaml"
generates:
- "{{.ROOT_DIR}}/.venv/pyvenv.cfg"

View file

@ -0,0 +1,104 @@
---
version: "3"
x-task-vars: &task-vars
node: "{{.node}}"
ceph_disk: "{{.ceph_disk}}"
ts: "{{.ts}}"
jobName: "{{.jobName}}"
vars:
waitForJobScript: "../_scripts/wait-for-k8s-job.sh"
ts: '{{now | date "150405"}}'
tasks:
wipe-node-aule:
desc: Trigger a wipe of Rook-Ceph data on node "aule"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460833"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: aule
wipe-node-orome:
desc: Trigger a wipe of Rook-Ceph data on node "orome"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37645333"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: orome
wipe-node-eonwe:
desc: Trigger a wipe of Rook-Ceph data on node "eonwe"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460887"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: eonwe
wipe-node-arlen:
desc: Trigger a wipe of Rook-Ceph data on node "arlen"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460897"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: arlen
wipe-disk:
desc: Wipe all remnants of rook-ceph from a given disk (ex. task rook:wipe-disk node=aule ceph_disk="/dev/nvme0n1")
silent: true
internal: true
cmds:
- envsubst < <(cat {{.wipeRookDiskJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} {{.wipeCephDiskJobName}} default
- kubectl -n default wait job/{{.wipeCephDiskJobName}} --for condition=complete --timeout=1m
- kubectl -n default logs job/{{.wipeCephDiskJobName}} --container list
- kubectl -n default delete job {{.wipeCephDiskJobName}}
vars:
node: '{{ or .node (fail "`node` is required") }}'
ceph_disk: '{{ or .ceph_disk (fail "`ceph_disk` is required") }}'
jobName: 'wipe-disk-{{- .node -}}-{{- .ceph_disk | replace "/" "-" -}}-{{- .ts -}}'
wipeRookDiskJobTemplate: "WipeDiskJob.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.wipeRookDiskJobTemplate}}
wipe-data:
desc: Wipe all remnants of rook-ceph from a given disk (ex. task rook:wipe-data node=aule)
silent: true
internal: true
cmds:
- envsubst < <(cat {{.wipeRookDataJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} {{.wipeRookDataJobName}} default
- kubectl -n default wait job/{{.wipeRookDataJobName}} --for condition=complete --timeout=1m
- kubectl -n default logs job/{{.wipeRookDataJobName}} --container list
- kubectl -n default delete job {{.wipeRookDataJobName}}
vars:
node: '{{ or .node (fail "`node` is required") }}'
jobName: "wipe-rook-data-{{- .node -}}-{{- .ts -}}"
wipeRookDataJobTemplate: "WipeRookDataJob.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.wipeRookDataJobTemplate}}

View file

@ -0,0 +1,26 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${jobName}"
namespace: "default"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${node}
containers:
- name: disk-wipe
image: docker.io/library/alpine:3.20.0
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted;
sgdisk --zap-all ${ceph_disk};
blkdiscard ${ceph_disk};
dd if=/dev/zero bs=1M count=10000 oflag=direct of=${ceph_disk};
partprobe ${ceph_disk};

View file

@ -0,0 +1,29 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${jobName}"
namespace: "default"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${node}
containers:
- name: disk-wipe
image: docker.io/library/alpine:3.20.0
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- rm -rf /mnt/host_var/lib/rook
volumeMounts:
- mountPath: /mnt/host_var
name: host-var
volumes:
- name: host-var
hostPath:
path: /var

View file

@ -0,0 +1,19 @@
apiVersion: v1
kind: Pod
metadata:
name: my-pod
spec:
containers:
- name: disk-wipe
image: docker.io/library/alpine:3.20.0
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted e2fsprogs;
sgdisk --zap-all /dev/nvme1n1;
blkdiscard /dev/nvme1n1;
dd if=/dev/zero bs=1M count=10000 oflag=direct of=/dev/nvme1n1;
sgdisk /dev/nvme1n1
partprobe /dev/nvme1n1;

View file

@ -0,0 +1,116 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: jellyfin
namespace: default
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
jellyfin:
type: statefulset
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: jellyfin/jellyfin
tag: 10.8.13
env:
NVIDIA_VISIBLE_DEVICES: "all"
NVIDIA_DRIVER_CAPABILITIES: "compute,video,utility"
DOTNET_SYSTEM_IO_DISABLEFILELOCKING: "true"
JELLYFIN_FFmpeg__probesize: 50000000
JELLYFIN_FFmpeg__analyzeduration: 50000000
JELLYFIN_PublishedServerUrl: jelly.hsn.dev
TZ: America/Chicago
probes:
liveness: &probes
enabled: true
custom: true
spec:
httpGet:
path: /health
port: &port 8096
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
readiness: *probes
startup:
enabled: false
resources:
requests:
nvidia.com/gpu: 1 # requesting 1 GPU
cpu: 100m
memory: 512Mi
limits:
nvidia.com/gpu: 1
memory: 4Gi
pod:
runtimeClassName: nvidia
enableServiceLinks: false
nodeSelector:
nvidia.com/gpu.present: "true"
securityContext:
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
supplementalGroups: [44, 105, 10000]
service:
app:
controller: jellyfin
ports:
http:
port: *port
ingress:
app:
enabled: true
className: external-nginx
annotations:
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
external-dns.alpha.kubernetes.io/target: external.hsn.dev
hosts:
- host: &host "jelly.hsn.dev"
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
persistence:
config:
existingClaim: jellyfin
enabled: true
transcode:
type: emptyDir
globalMounts:
- path: /transcode
media:
enabled: true
type: nfs
server: 10.1.1.12
path: /mnt/users/Media
globalMounts:
- path: /media

View file

@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: default
resources:
- ./helmrelease.yaml
- ../../../../templates/volsync

View file

@ -0,0 +1,23 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app jellyfin
namespace: flux-system
spec:
dependsOn:
- name: external-secrets-stores
path: ./kubernetes/apps/default/jellyfin/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 10Gi

View file

@ -0,0 +1,26 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: home-assistant
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: home-assistant-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
HASS_ELEVATION: "{{ .hass_elevation }}"
HASS_LATITUDE: "{{ .hass_latitude }}"
HASS_LONGITUDE: "{{ .hass_longitude }}"
dataFrom:
- extract:
key: home-assistant
rewrite:
- regexp:
source: "(.*)"
target: "hass_$1"

View file

@ -0,0 +1,90 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: home-assistant
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
controllers:
home-assistant:
annotations:
reloader.stakater.com/auto: "true"
pod:
annotations:
k8s.v1.cni.cncf.io/networks: |
[{
"name":"multus-iot",
"namespace": "kube-system",
"ips": ["10.1.3.151/24"]
}]
securityContext:
runAsUser: 568
runAsGroup: 568
runAsNonRoot: true
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
containers:
app:
image:
repository: ghcr.io/home-assistant/home-assistant
tag: 2024.5.5
env:
TZ: America/Chicago
HASS_HTTP_TRUSTED_PROXY_1: 10.244.0.0/16
envFrom:
- secretRef:
name: home-assistant-secret
resources:
requests:
cpu: 10m
limits:
memory: 1Gi
service:
app:
controller: home-assistant
ports:
http:
port: 8123
ingress:
app:
className: internal-nginx
hosts:
- host: &host hass.jahanson.tech
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts: [*host]
persistence:
config:
existingClaim: home-assistant
logs:
type: emptyDir
globalMounts:
- path: /config/logs
tts:
type: emptyDir
globalMounts:
- path: /config/tts
tmp:
type: emptyDir

View file

@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
- ../../../../templates/volsync

View file

@ -0,0 +1,29 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app home-assistant
namespace: flux-system
spec:
targetNamespace: home-automation
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: openebs-system
- name: volsync
path: ./kubernetes/apps/home-automation/home-assistant/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 5Gi

View file

@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./mosquitto/ks.yaml

View file

@ -0,0 +1,107 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: &app matter-server
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 3.2.1
interval: 15m
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 3
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
controllers:
matter-server:
type: statefulset
annotations:
reloader.stakater.com/auto: "true"
pod:
annotations:
k8s.v1.cni.cncf.io/networks: |
[{
"name":"multus-iot",
"namespace": "kube-system",
"ips": ["10.1.3.152/24"]
}]
securityContext:
runAsUser: 568
runAsGroup: 568
runAsNonRoot: true
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
containers:
app:
image:
repository: ghcr.io/home-assistant-libs/python-matter-server
tag: 6.0.1
pullPolicy: IfNotPresent
env:
TZ: "America/Chicago"
MATTER_SERVER__INSTANCE_NAME: Matter-Server
MATTER_SERVER__PORT: &port 5580
MATTER_SERVER__APPLICATION_URL: &host matter.jahanson.tech
MATTER_SERVER__LOG_LEVEL: info
probes:
liveness:
enabled: true
readiness:
enabled: true
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
resources:
requests:
memory: "100M"
limits:
memory: "500M"
service:
app:
controller: *app
type: LoadBalancer
annotations:
io.cilium/lb-ipam-ips: "10.1.1.37"
ports:
api:
enabled: true
primary: true
protocol: TCP
port: *port
externalTrafficPolicy: Cluster
persistence:
config:
enabled: true
existingClaim: matter-server
advancedMounts:
matter-server:
app:
- path: "/data"
ingress:
app:
className: internal-nginx
hosts:
- host: *host
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts: [*host]

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ../../../../templates/volsync

View file

@ -0,0 +1,28 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app matter-server
namespace: flux-system
spec:
targetNamespace: home-automation
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: openebs-system
- name: volsync
path: ./kubernetes/apps/home-automation/matter-server/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 1Gi

View file

@ -0,0 +1,9 @@
per_listener_settings false
listener 1883
allow_anonymous false
persistence true
persistence_location /data
autosave_interval 1800
connection_messages false
autosave_interval 60
password_file /mosquitto/external_config/mosquitto_pwd

View file

@ -0,0 +1,27 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: mosquitto
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: mosquitto-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
mosquitto_pwd: |
{{ .mosquitto_username }}:{{ .mosquitto_password }}
{{ .mosquitto_zwave_username }}:{{ .mosquitto_zwave_password }}
{{ .mosquitto_home_assistant_username }}:{{ .mosquitto_home_assistant_password }}
dataFrom:
- extract:
key: mosquitto
rewrite:
- regexp:
source: "(.*)"
target: "mosquitto_$1"

View file

@ -0,0 +1,105 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app mosquitto
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.2.1
interval: 30m
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
values:
controllers:
mosquitto:
annotations:
reloader.stakater.com/auto: "true"
pod:
securityContext:
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
initContainers:
init-config:
image:
repository: public.ecr.aws/docker/library/eclipse-mosquitto
tag: 2.0.18
command:
- "/bin/sh"
- "-c"
args:
- cp /tmp/secret/* /mosquitto/external_config/;
mosquitto_passwd -U /mosquitto/external_config/mosquitto_pwd;
chmod 0600 /mosquitto/external_config/mosquitto_pwd;
containers:
app:
image:
repository: public.ecr.aws/docker/library/eclipse-mosquitto
tag: 2.0.18
probes:
liveness:
enabled: true
readiness:
enabled: true
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
resources:
requests:
cpu: 5m
memory: 10M
limits:
memory: 10M
service:
app:
controller: mosquitto
type: LoadBalancer
annotations:
external-dns.alpha.kubernetes.io/hostname: "mqtt.jahanson.tech"
io.cilium/lb-ipam-ips: "10.1.1.36"
externalTrafficPolicy: Local
ports:
mqtt:
enabled: true
port: 1883
persistence:
data:
existingClaim: *app
advancedMounts:
mosquitto:
app:
- path: /data
mosquitto-configfile:
type: configMap
name: mosquitto-configmap
advancedMounts:
mosquitto:
app:
- path: /mosquitto/config/mosquitto.conf
subPath: mosquitto.conf
mosquitto-secret:
type: secret
name: mosquitto-secret
advancedMounts:
mosquitto:
init-config:
- path: /tmp/secret
mosquitto-externalconfig:
type: emptyDir
globalMounts:
- path: /mosquitto/external_config

View file

@ -0,0 +1,14 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ./externalsecret.yaml
- ../../../../templates/volsync
configMapGenerator:
- name: mosquitto-configmap
files:
- config/mosquitto.conf
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,28 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &appname mosquitto
namespace: flux-system
spec:
targetNamespace: home-automation
commonMetadata:
labels:
app.kubernetes.io/name: *appname
interval: 10m
path: "./kubernetes/apps/home-automation/mosquitto/app"
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: true
dependsOn:
- name: openebs
- name: volsync
- name: external-secrets-stores
postBuild:
substitute:
APP: *appname
VOLSYNC_CLAIM: mosquitto-data
VOLSYNC_CAPACITY: 512Mi

View file

@ -0,0 +1,8 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: home-automation
labels:
kustomize.toolkit.fluxcd.io/prune: disabled
volsync.backube/privileged-movers: "true"

View file

@ -0,0 +1,588 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.14.0
creationTimestamp: null
name: ciliumbgppeeringpolicies.cilium.io
spec:
group: cilium.io
names:
categories:
- cilium
- ciliumbgp
kind: CiliumBGPPeeringPolicy
listKind: CiliumBGPPeeringPolicyList
plural: ciliumbgppeeringpolicies
shortNames:
- bgpp
singular: ciliumbgppeeringpolicy
scope: Cluster
versions:
- additionalPrinterColumns:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v2alpha1
schema:
openAPIV3Schema:
description: CiliumBGPPeeringPolicy is a Kubernetes third-party resource for
instructing Cilium's BGP control plane to create virtual BGP routers.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Spec is a human readable description of a BGP peering policy
properties:
nodeSelector:
description: "NodeSelector selects a group of nodes where this BGP
Peering Policy applies. \n If empty / nil this policy applies to
all nodes."
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the value from the
MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
virtualRouters:
description: A list of CiliumBGPVirtualRouter(s) which instructs the
BGP control plane how to instantiate virtual BGP routers.
items:
description: CiliumBGPVirtualRouter defines a discrete BGP virtual
router configuration.
properties:
exportPodCIDR:
default: false
description: ExportPodCIDR determines whether to export the
Node's private CIDR block to the configured neighbors.
type: boolean
localASN:
description: LocalASN is the ASN of this virtual router. Supports
extended 32bit ASNs
format: int64
maximum: 4294967295
minimum: 0
type: integer
neighbors:
description: Neighbors is a list of neighboring BGP peers for
this virtual router
items:
description: CiliumBGPNeighbor is a neighboring peer for use
in a CiliumBGPVirtualRouter configuration.
properties:
advertisedPathAttributes:
description: AdvertisedPathAttributes can be used to apply
additional path attributes to selected routes when advertising
them to the peer. If empty / nil, no additional path
attributes are advertised.
items:
description: CiliumBGPPathAttributes can be used to
apply additional path attributes to matched routes
when advertising them to a BGP peer.
properties:
communities:
description: Communities defines a set of community
values advertised in the supported BGP Communities
path attributes. If nil / not set, no BGP Communities
path attribute will be advertised.
properties:
large:
description: Large holds a list of the BGP Large
Communities Attribute (RFC 8092) values.
items:
description: BGPLargeCommunity type represents
a value of the BGP Large Communities Attribute
(RFC 8092), as three 4-byte decimal numbers
separated by colons.
pattern: ^([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5]):([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5]):([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5])$
type: string
type: array
standard:
description: Standard holds a list of "standard"
32-bit BGP Communities Attribute (RFC 1997)
values defined as numeric values.
items:
description: BGPStandardCommunity type represents
a value of the "standard" 32-bit BGP Communities
Attribute (RFC 1997) as a 4-byte decimal
number or two 2-byte decimal numbers separated
by a colon (<0-65535>:<0-65535>). For example,
no-export community value is 65553:65281.
pattern: ^([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5])$|^([0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]):([0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$
type: string
type: array
wellKnown:
description: WellKnown holds a list "standard"
32-bit BGP Communities Attribute (RFC 1997)
values defined as well-known string aliases
to their numeric values.
items:
description: "BGPWellKnownCommunity type represents
a value of the \"standard\" 32-bit BGP Communities
Attribute (RFC 1997) as a well-known string
alias to its numeric value. Allowed values
and their mapping to the numeric values:
\n internet = 0x00000000
(0:0) planned-shut = 0xffff0000
(65535:0) accept-own = 0xffff0001
(65535:1) route-filter-translated-v4 = 0xffff0002
(65535:2) route-filter-v4 = 0xffff0003
(65535:3) route-filter-translated-v6 = 0xffff0004
(65535:4) route-filter-v6 = 0xffff0005
(65535:5) llgr-stale = 0xffff0006
(65535:6) no-llgr = 0xffff0007
(65535:7) blackhole = 0xffff029a
(65535:666) no-export =
0xffffff01\t(65535:65281) no-advertise =
0xffffff02 (65535:65282) no-export-subconfed
\ = 0xffffff03 (65535:65283) no-peer
\ = 0xffffff04 (65535:65284)"
enum:
- internet
- planned-shut
- accept-own
- route-filter-translated-v4
- route-filter-v4
- route-filter-translated-v6
- route-filter-v6
- llgr-stale
- no-llgr
- blackhole
- no-export
- no-advertise
- no-export-subconfed
- no-peer
type: string
type: array
type: object
localPreference:
description: LocalPreference defines the preference
value advertised in the BGP Local Preference path
attribute. As Local Preference is only valid for
iBGP peers, this value will be ignored for eBGP
peers (no Local Preference path attribute will
be advertised). If nil / not set, the default
Local Preference of 100 will be advertised in
the Local Preference path attribute for iBGP peers.
format: int64
maximum: 4294967295
minimum: 0
type: integer
selector:
description: Selector selects a group of objects
of the SelectorType resulting into routes that
will be announced with the configured Attributes.
If nil / not set, all objects of the SelectorType
are selected.
properties:
matchExpressions:
description: matchExpressions is a list of label
selector requirements. The requirements are
ANDed.
items:
description: A label selector requirement
is a selector that contains values, a key,
and an operator that relates the key and
values.
properties:
key:
description: key is the label key that
the selector applies to.
type: string
operator:
description: operator represents a key's
relationship to a set of values. Valid
operators are In, NotIn, Exists and
DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string
values. If the operator is In or NotIn,
the values array must be non-empty.
If the operator is Exists or DoesNotExist,
the values array must be empty. This
array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the
value from the MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value}
pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions,
whose key field is "key", the operator is
"In", and the values array contains only "value".
The requirements are ANDed.
type: object
type: object
selectorType:
description: 'SelectorType defines the object type
on which the Selector applies: - For "PodCIDR"
the Selector matches k8s CiliumNode resources
(path attributes apply to routes announced for
PodCIDRs of selected CiliumNodes. Only affects
routes of cluster scope / Kubernetes IPAM CIDRs,
not Multi-Pool IPAM CIDRs. - For "CiliumLoadBalancerIPPool"
the Selector matches CiliumLoadBalancerIPPool
custom resources (path attributes apply to routes
announced for selected CiliumLoadBalancerIPPools).
- For "CiliumPodIPPool" the Selector matches CiliumPodIPPool
custom resources (path attributes apply to routes
announced for allocated CIDRs of selected CiliumPodIPPools).'
enum:
- PodCIDR
- CiliumLoadBalancerIPPool
- CiliumPodIPPool
type: string
required:
- selectorType
type: object
type: array
authSecretRef:
description: AuthSecretRef is the name of the secret to
use to fetch a TCP authentication password for this
peer.
type: string
connectRetryTimeSeconds:
default: 120
description: ConnectRetryTimeSeconds defines the initial
value for the BGP ConnectRetryTimer (RFC 4271, Section
8).
format: int32
maximum: 2147483647
minimum: 1
type: integer
eBGPMultihopTTL:
default: 1
description: EBGPMultihopTTL controls the multi-hop feature
for eBGP peers. Its value defines the Time To Live (TTL)
value used in BGP packets sent to the neighbor. The
value 1 implies that eBGP multi-hop feature is disabled
(only a single hop is allowed). This field is ignored
for iBGP peers.
format: int32
maximum: 255
minimum: 1
type: integer
families:
description: "Families, if provided, defines a set of
AFI/SAFIs the speaker will negotiate with it's peer.
\n If this slice is not provided the default families
of IPv6 and IPv4 will be provided."
items:
description: CiliumBGPFamily represents a AFI/SAFI address
family pair.
properties:
afi:
description: Afi is the Address Family Identifier
(AFI) of the family.
enum:
- ipv4
- ipv6
- l2vpn
- ls
- opaque
type: string
safi:
description: Safi is the Subsequent Address Family
Identifier (SAFI) of the family.
enum:
- unicast
- multicast
- mpls_label
- encapsulation
- vpls
- evpn
- ls
- sr_policy
- mup
- mpls_vpn
- mpls_vpn_multicast
- route_target_constraints
- flowspec_unicast
- flowspec_vpn
- key_value
type: string
required:
- afi
- safi
type: object
type: array
gracefulRestart:
description: GracefulRestart defines graceful restart
parameters which are negotiated with this neighbor.
If empty / nil, the graceful restart capability is disabled.
properties:
enabled:
description: Enabled flag, when set enables graceful
restart capability.
type: boolean
restartTimeSeconds:
default: 120
description: RestartTimeSeconds is the estimated time
it will take for the BGP session to be re-established
with peer after a restart. After this period, peer
will remove stale routes. This is described RFC
4724 section 4.2.
format: int32
maximum: 4095
minimum: 1
type: integer
required:
- enabled
type: object
holdTimeSeconds:
default: 90
description: HoldTimeSeconds defines the initial value
for the BGP HoldTimer (RFC 4271, Section 4.2). Updating
this value will cause a session reset.
format: int32
maximum: 65535
minimum: 3
type: integer
keepAliveTimeSeconds:
default: 30
description: KeepaliveTimeSeconds defines the initial
value for the BGP KeepaliveTimer (RFC 4271, Section
8). It can not be larger than HoldTimeSeconds. Updating
this value will cause a session reset.
format: int32
maximum: 65535
minimum: 1
type: integer
peerASN:
description: PeerASN is the ASN of the peer BGP router.
Supports extended 32bit ASNs
format: int64
maximum: 4294967295
minimum: 0
type: integer
peerAddress:
description: PeerAddress is the IP address of the peer.
This must be in CIDR notation and use a /32 to express
a single host.
format: cidr
type: string
peerPort:
default: 179
description: PeerPort is the TCP port of the peer. 1-65535
is the range of valid port numbers that can be specified.
If unset, defaults to 179.
format: int32
maximum: 65535
minimum: 1
type: integer
required:
- peerASN
- peerAddress
type: object
minItems: 1
type: array
podIPPoolSelector:
description: "PodIPPoolSelector selects CiliumPodIPPools based
on labels. The virtual router will announce allocated CIDRs
of matching CiliumPodIPPools. \n If empty / nil no CiliumPodIPPools
will be announced."
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the value from
the MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
serviceSelector:
description: "ServiceSelector selects a group of load balancer
services which this virtual router will announce. The loadBalancerClass
for a service must be nil or specify a class supported by
Cilium, e.g. \"io.cilium/bgp-control-plane\". Refer to the
following document for additional details regarding load balancer
classes: \n https://kubernetes.io/docs/concepts/services-networking/service/#load-balancer-class
\n If empty / nil no services will be announced."
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the value from
the MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
required:
- localASN
- neighbors
type: object
minItems: 1
type: array
required:
- virtualRouters
type: object
required:
- metadata
type: object
served: true
storage: true
subresources: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View file

@ -0,0 +1,36 @@
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPPeeringPolicy
# comments courtesy of JJGadgets
# MAKE SURE CRDs ARE INSTALLED IN CLUSTER VIA cilium-config ConfigMap OR Cilium HelmRelease/values.yaml (bgpControlPlane.enabled: true), BEFORE THIS IS APPLIED!
# "CiliumBGPPeeringPolicy" Custom Resource will replace the old MetalLB BGP's "bgp-config" ConfigMap
# "CiliumBGPPeeringPolicy" is used with `bgpControlPlane.enabled: true` which uses GoBGP, NOT the old `bgp.enabled: true` which uses MetalLB
metadata:
name: bgp-loadbalancer-ip-main
spec:
nodeSelector:
matchLabels:
kubernetes.io/os: "linux" # match all Linux nodes, change this to match more granularly if more than 1 PeeringPolicy is to be used throughout cluster
virtualRouters:
- localASN: 64512
exportPodCIDR: false
serviceSelector: # this replaces address-pools, instead of defining the range of IPs that can be assigned to LoadBalancer services, now services have to match below selectors for their LB IPs to be announced
matchExpressions:
- {
key: thisFakeSelector,
operator: NotIn,
values: ["will-match-and-announce-all-services"],
}
neighbors:
- peerAddress: "10.1.1.1/32" # unlike bgp-config ConfigMap, peerAddress needs to be in CIDR notation
peerASN: 64512
---
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumloadbalancerippool_v2alpha1.json
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: main-pool
spec:
cidrs:
- cidr: 10.45.0.1/24

View file

@ -0,0 +1,78 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: cilium
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: cilium
version: 1.15.3
sourceRef:
kind: HelmRepository
name: cilium
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
cluster:
name: homelab
id: 1
hubble:
relay:
enabled: true
ui:
enabled: true
metrics:
enableOpenMetrics: true
prometheus:
enabled: true
operator:
prometheus:
enabled: true
ipam:
mode: kubernetes
kubeProxyReplacement: true
k8sServiceHost: 127.0.0.1
k8sServicePort: 7445
rollOutCiliumPods: true
cgroup:
automount:
enabled: false
hostRoot: /sys/fs/cgroup
bgp:
enabled: false
announce:
loadbalancerIP: true
podCIDR: false
bgpControlPlane:
enabled: true
securityContext:
capabilities:
ciliumAgent:
- CHOWN
- KILL
- NET_ADMIN
- NET_RAW
- IPC_LOCK
- SYS_ADMIN
- SYS_RESOURCE
- DAC_OVERRIDE
- FOWNER
- SETGID
- SETUID
cleanCiliumState:
- NET_ADMIN
- SYS_ADMIN
- SYS_RESOURCE

View file

@ -0,0 +1,23 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: allow-ssh
spec:
description: ""
nodeSelector:
matchLabels:
# node-access: ssh
node-role.kubernetes.io/control-plane: "true"
ingress:
- fromEntities:
- cluster
- toPorts:
- ports:
- port: "22"
protocol: TCP
- icmps:
- fields:
- type: 8
family: IPv4

View file

@ -0,0 +1,27 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: api-server
spec:
nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# load balancer -> api server
- fromCIDR:
- 167.235.217.82/32
toPorts:
- ports:
- port: '6443'
protocol: TCP
egress:
# api server -> kubelet
- toEntities:
- remote-node
toPorts:
- ports:
- port: '10250'
protocol: TCP

View file

@ -0,0 +1,41 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: cilium-health
specs:
- endpointSelector:
# apply to health endpoints
matchLabels:
reserved:health: ''
ingress:
# cilium agent -> cilium agent
- fromEntities:
- host
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP
- nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# cilium agent -> cilium agent
- fromEntities:
- health
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP
egress:
# cilium agent -> cilium agent
- toEntities:
- health
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP

View file

@ -0,0 +1,26 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: cilium-vxlan
spec:
nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# node -> vxlan
- fromEntities:
- remote-node
toPorts:
- ports:
- port: '8472'
protocol: UDP
egress:
# node -> vxlan
- toEntities:
- remote-node
toPorts:
- ports:
- port: '8472'
protocol: UDP

View file

@ -0,0 +1,65 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: core-dns
namespace: kube-system
specs:
- nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# core dns -> api server
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '6443'
protocol: TCP
- nodeSelector:
# apply to all nodes
matchLabels: {}
egress:
# kubelet -> core dns probes
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '8080'
protocol: TCP
- port: '8181'
protocol: TCP
- endpointSelector:
# apply to core dns pods
matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
ingress:
# kubelet -> core dns probes
- fromEntities:
- host
toPorts:
- ports:
- port: '8080'
protocol: TCP
- port: '8181'
protocol: TCP
egress:
# core dns -> api server
- toEntities:
- kube-apiserver
toPorts:
- ports:
- port: '6443'
protocol: TCP
# core dns -> upstream DNS
- toCIDR:
- 185.12.64.1/32
- 185.12.64.2/32
toPorts:
- ports:
- port: '53'
protocol: UDP

View file

@ -0,0 +1,27 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: etcd
spec:
nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# etcd peer -> etcd peer
- fromEntities:
- remote-node
toPorts:
- ports:
- port: '2380'
protocol: TCP
egress:
# etcd peer -> etcd peer
- toEntities:
- remote-node
toPorts:
- ports:
- port: '2380'
protocol: TCP

View file

@ -0,0 +1,15 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: "cilium.io/v2"
kind: CiliumClusterwideNetworkPolicy
metadata:
name: allow-specific-traffic
spec:
endpointSelector: {}
ingress:
- fromEntities:
- host
toPorts:
- ports:
- port: '6443'
protocol: TCP

View file

@ -0,0 +1,50 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: hubble-relay
namespace: kube-system
specs:
- nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# hubble relay -> hubble agent
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4244'
protocol: TCP
egress:
# kubelet -> hubble relay probes
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4245'
protocol: TCP
- endpointSelector:
# apply to hubble relay pods
matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
ingress:
# kubelet -> hubble relay probes
- fromEntities:
- host
toPorts:
- ports:
- port: '4245'
protocol: TCP
egress:
# hubble relay -> hubble agent
- toEntities:
- host
- remote-node
toPorts:
- ports:
- port: '4244'
protocol: TCP

View file

@ -0,0 +1,75 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: hubble-ui
namespace: kube-system
specs:
- nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: ''
ingress:
# hubble ui -> api server
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
toPorts:
- ports:
- port: '6443'
protocol: TCP
- endpointSelector:
# apply to core dns endpoints
matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
ingress:
# hubble ui -> core dns
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
toPorts:
- ports:
- port: '53'
protocol: UDP
- endpointSelector:
# apply to hubble relay endpoints
matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
ingress:
# hubble ui -> hubble relay
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
toPorts:
- ports:
- port: '4245'
protocol: TCP
- endpointSelector:
# apply to hubble ui endpoints
matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
egress:
# hubble ui -> api server
- toEntities:
- kube-apiserver
toPorts:
- ports:
- port: '6443'
protocol: TCP
# hubble ui -> hubble relay
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4245'
protocol: TCP
# hubble ui -> core dns
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '53'
protocol: UDP

View file

@ -0,0 +1,28 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: kubelet
spec:
nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# api server -> kubelet
- fromEntities:
- kube-apiserver
toPorts:
- ports:
- port: '10250'
protocol: TCP
egress:
# kubelet -> load balancer
- toCIDR:
- 167.235.217.82/32
toEntities:
- host
toPorts:
- ports:
- port: '6443'
protocol: TCP

View file

@ -0,0 +1,16 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kube-system
resources:
- ./allow-ssh.yaml
- ./apiserver.yaml
- ./cilium-health.yaml
- ./cilium-vxlan.yaml
- ./core-dns.yaml
- ./etcd.yaml
- ./hubble-relay.yaml
- ./hubble-ui.yaml
- ./kubelet.yaml

View file

@ -0,0 +1,17 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cilium
namespace: flux-system
spec:
interval: 30m
retryInterval: 1m
timeout: 5m
path: "./kubernetes/apps/kube-system/cilium/app"
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false

View file

@ -7,6 +7,6 @@ resources:
configMapGenerator:
- name: spegel-helm-values
files:
- values.yaml=./helm-values.yml
- values.yaml=./resources/values.yml
configurations:
- kustomizeconfig.yaml

View file

@ -0,0 +1,17 @@
---
spegel:
containerdSock: /run/containerd/containerd.sock
containerdRegistryConfigPath: /etc/cri/conf.d/hosts
registries:
- https://docker.io
- https://ghcr.io
- https://quay.io
- https://mcr.microsoft.com
- https://public.ecr.aws
- https://gcr.io
- https://registry.k8s.io
- https://k8s.gcr.io
- https://lscr.io
service:
registry:
hostPort: 29999

View file

@ -14,7 +14,7 @@ spec:
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m

View file

@ -0,0 +1,109 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app zfs-scrub
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.2.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
kubanetics:
type: cronjob
cronjob:
schedule: "@weekly"
parallelism: 1 # Set to my total number of nodes
containers:
app:
image:
repository: ghcr.io/aarnaud/talos-debug-tools
tag: latest-6.6.29
command: ["/bin/bash", "-c"]
args:
- |
# Trim filesystems
chroot /host /usr/local/sbin/zpool scrub nahar
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
resources:
requests:
cpu: 25m
limits:
memory: 128Mi
securityContext:
privileged: true
pod:
hostNetwork: true
hostPID: true
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: *app
persistence:
netfs:
type: hostPath
hostPath: /sys
hostPathType: Directory
globalMounts:
- path: /sys
readOnly: true
dev:
type: hostPath
hostPath: /dev
hostPathType: Directory
globalMounts:
- path: /dev
modules:
type: hostPath
hostPath: /lib/modules
hostPathType: ""
globalMounts:
- path: /lib/modules
udev:
type: hostPath
hostPath: /run/udev
hostPathType: ""
globalMounts:
- path: /run/udev
localtime:
type: hostPath
hostPath: /etc/localtime
hostPathType: ""
globalMounts:
- path: /etc/localtime
host:
type: hostPath
hostPath: /
hostPathType: Directory
globalMounts:
- path: /host
efivars:
type: hostPath
hostPath: /sys/firmware/efi/efivars
hostPathType: ""
globalMounts:
- path: /sys/firmware/efi/efivars

View file

@ -0,0 +1,12 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
configMapGenerator:
- name: zfs-scrub-configmap
files:
- zfs-scrub.sh=./resources/zfs-scrub.sh
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,20 @@
#!/usr/bin/env bash
KUBELET_BIN="/usr/local/bin/kubelet"
KUBELET_PID="$(pgrep -f $KUBELET_BIN)"
ZPOOL="nahar"
if [ -z "${KUBELET_PID}" ]; then
echo "kubelet not found"
exit 1
fi
# Enter namespaces and run commands
nsrun() {
nsenter \
--mount="/host/proc/${KUBELET_PID}/ns/mnt" \
--net="/host/proc/${KUBELET_PID}/ns/net" \
-- bash -c "$1"
}
# Scrub filesystems
nsrun "zpool scrub ${ZPOOL}"

View file

@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app zfs-scrub
namespace: flux-system
spec:
targetNamespace: kube-system
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/kube-system/zfs-scrub/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -0,0 +1,16 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: immich-app-config
labels:
app.kubernetes.io/name: immich
data:
LOG_LEVEL: verbose
DB_VECTOR_EXTENSION: pgvector
NODE_ENV: production
REDIS_HOSTNAME: dragonfly.database.svc.cluster.local
REDIS_PORT: "6379"
IMMICH_WEB_URL: http://immich-web.media.svc.cluster.local:3000
IMMICH_SERVER_URL: http://immich-server.media.svc.cluster.local:3001
IMMICH_MACHINE_LEARNING_URL: http://immich-machine-learning.media.svc.cluster.local:3003

View file

@ -0,0 +1,19 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: immich
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: immich-secret
template:
engineVersion: v2
data:
DATABASE_URI: "postgresql://{{ .DATABASE_USER }}:{{ .DATABASE_PASSWORD }}@immich-primary-real.media.svc:{{ .DATABASE_PORT }}/{{ .DATABASE_NAME }}"
dataFrom:
- extract:
key: immich

View file

@ -0,0 +1,21 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: immich-postgres-gatus-ep
labels:
gatus.io/enabled: "true"
data:
config.yaml: |
endpoints:
- name: immich-postgres
group: infrastructure
url: tcp://immich-primary-real.media.svc.cluster.local:5432
interval: 1m
ui:
hide-url: true
hide-hostname: true
conditions:
- "[CONNECTED] == true"
alerts:
- type: pushover

View file

@ -0,0 +1,97 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: &name immich
namespace: default
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
immich-server:
type: statefulset
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: ghcr.io/immich-app/immich-server
tag: v1.105.1
command: /bin/sh
args:
- ./start-server.sh
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
memory: 4Gi
env:
TZ: America/Chicago
DB_URL:
valueFrom:
secretKeyRef:
name: immich-secret
key: DATABASE_URI
envFrom:
- configMapRef:
name: immich-app-config
service:
app:
controller: immich-server
ports:
http:
port: 3001
ingress:
app:
enabled: true
className: external-nginx
annotations:
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
external-dns.alpha.kubernetes.io/target: external.hsn.dev
nginx.ingress.kubernetes.io/proxy-body-size: "0"
hosts:
- host: &host "im.hsn.dev"
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
persistence:
media:
enabled: true
type: nfs
server: 10.1.1.13
path: /eru/media/immich
globalMounts:
- path: /usr/src/app/upload

View file

@ -0,0 +1,27 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./configmap.yaml
- ./externalsecret.yaml
- ./gatus.yaml
- ./helmrelease.yaml
- ./machine-learning
- ./microservices
- ./postgresCluster.yaml
- ./pushsecret.yaml
- ./service.yaml
configMapGenerator:
- name: immich-databse-init-sql
files:
- init.sql=./resources/init.sql
labels:
- pairs:
app.kubernetes.io/name: immich
app.kubernetes.io/instance: immich
app.kubernetes.io/part-of: immich
generatorOptions:
disableNameSuffixHash: true
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled

View file

@ -0,0 +1,82 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: immich-machine-learning
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
interval: 15m
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
immich-machine-learning:
annotations:
reloader.stakater.com/auto: "true"
strategy: Recreate
pod:
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
containers:
app:
image:
repository: ghcr.io/immich-app/immich-machine-learning
tag: v1.105.1
resources:
requests:
cpu: 15m
memory: 250Mi
limits:
memory: 4000Mi
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
envFrom:
- configMapRef:
name: immich-app-config
env:
DB_URL:
valueFrom:
secretKeyRef:
name: immich-secret
key: DATABASE_URI
service:
app:
controller: immich-machine-learning
ports:
http:
port: 3003
persistence:
media:
enabled: true
type: nfs
server: 10.1.1.13
path: /eru/media/immich
globalMounts:
- path: /usr/src/app/upload
cache:
enabled: true
type: emptyDir

View file

@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
labels:
- pairs:
app.kubernetes.io/name: immich-machine-learning
app.kubernetes.io/instance: immich-machine-learning
app.kubernetes.io/part-of: immich
resources:
- ./helmrelease.yaml

View file

@ -0,0 +1,80 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: immich-microservices
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
interval: 15m
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
immich-microservices:
strategy: Recreate
annotations:
reloader.stakater.com/auto: "true"
pod:
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
containers:
app:
image:
repository: ghcr.io/immich-app/immich-server
tag: v1.105.1
command: /bin/sh
args:
- ./start-microservices.sh
resources:
requests:
cpu: 100m
memory: 250Mi
limits:
memory: 4000Mi
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
envFrom:
- configMapRef:
name: immich-app-config
env:
DB_URL:
valueFrom:
secretKeyRef:
name: immich-secret
key: DATABASE_URI
service:
app:
controller: immich-microservices
enabled: false
persistence:
media:
enabled: true
type: nfs
server: 10.1.1.13
path: /eru/media/immich
globalMounts:
- path: /usr/src/app/upload

View file

@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
labels:
- pairs:
app.kubernetes.io/name: immich-microservices
app.kubernetes.io/instance: immich-microservices
app.kubernetes.io/part-of: immich
resources:
- ./helmrelease.yaml

View file

@ -0,0 +1,94 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/postgres-operator.crunchydata.com/postgrescluster_v1beta1.json
apiVersion: postgres-operator.crunchydata.com/v1beta1
kind: PostgresCluster
metadata:
name: &name "${APP}"
spec:
postgresVersion: 16
dataSource:
pgbackrest:
stanza: db
configuration:
- secret:
name: pgo-s3-creds
global:
repo1-path: "/${APP}/repo1"
repo1-s3-uri-style: path
repo:
name: repo1
s3:
bucket: "crunchy-postgres"
endpoint: "s3.hsn.dev"
region: "us-east-1"
monitoring:
pgmonitor:
exporter:
# https://github.com/CrunchyData/postgres-operator-examples/blob/main/helm/install/values.yaml
image: registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi8-0.15.0-3
patroni:
dynamicConfiguration:
synchronous_mode: true
postgresql:
synchronous_commit: "on"
pg_hba:
- hostnossl all all 10.244.0.0/16 md5
- hostssl all all all md5
databaseInitSQL:
name: immich-databse-init-sql
key: init.sql
instances:
- name: postgres
metadata:
labels:
app.kubernetes.io/name: pgo-${APP}
replicas: 1
dataVolumeClaimSpec:
storageClassName: openebs-zfs
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: "kubernetes.io/hostname"
whenUnsatisfiable: "DoNotSchedule"
labelSelector:
matchLabels:
postgres-operator.crunchydata.com/cluster: ${APP}
postgres-operator.crunchydata.com/data: postgres
users:
- name: "immich"
databases:
- "immich"
options: "SUPERUSER"
password:
type: AlphaNumeric
backups:
pgbackrest:
configuration:
- secret:
name: pgo-s3-creds
global:
archive-push-queue-max: 4GiB
repo1-retention-full: "14"
repo1-retention-full-type: time
repo1-path: "/${APP}/repo1"
repo1-s3-uri-style: path
manual:
repoName: repo1
options:
- --type=full
metadata:
labels:
app.kubernetes.io/name: pgo-${APP}-backup
repos:
- name: repo1
schedules:
full: "0 1 * * 0"
differential: "0 1 * * 1-6"
s3:
bucket: "crunchy-postgres"
endpoint: "s3.hsn.dev"
region: "us-east-1"

View file

@ -0,0 +1,40 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/pushsecret_v1alpha1.json
apiVersion: external-secrets.io/v1alpha1
kind: PushSecret
metadata:
name: immich
spec:
refreshInterval: 1h
secretStoreRefs:
- name: onepassword-connect
kind: ClusterSecretStore
selector:
secret:
name: immich-pguser-immich
data:
- match:
secretKey: dbname
remoteRef:
remoteKey: immich
property: DATABASE_NAME
- match:
secretKey: host
remoteRef:
remoteKey: immich
property: DATABASE_HOST
- match:
secretKey: user
remoteRef:
remoteKey: immich
property: DATABASE_USER
- match:
secretKey: password
remoteRef:
remoteKey: immich
property: DATABASE_PASSWORD
- match:
secretKey: port
remoteRef:
remoteKey: immich
property: DATABASE_PORT

View file

@ -0,0 +1,4 @@
\c immich\\
CREATE EXTENSION vector;
CREATE EXTENSION cube;
CREATE EXTENSION earthdistance;

View file

@ -0,0 +1,20 @@
---
apiVersion: v1
kind: Service
metadata:
labels:
postgres-operator.crunchydata.com/cluster: immich
postgres-operator.crunchydata.com/role: primary
name: immich-primary-real
namespace: media
spec:
internalTrafficPolicy: Cluster
ports:
- name: postgres
port: 5432
protocol: TCP
targetPort: postgres
selector:
postgres-operator.crunchydata.com/cluster: immich
postgres-operator.crunchydata.com/role: master
type: ClusterIP

View file

@ -0,0 +1,30 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app immich
namespace: flux-system
spec:
targetNamespace: media
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: crunchy-postgres-operator
- name: external-secrets-stores
- name: dragonfly
path: ./kubernetes/apps/media/immich/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
DB_NAME: immich
DB_USER: immich

View file

@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./immich/ks.yaml

View file

@ -0,0 +1,9 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: media
labels:
kustomize.toolkit.fluxcd.io/prune: disabled
volsync.backube/privileged-movers: "true"
pgo-enabled-hsn.dev: "true"

View file

@ -0,0 +1,58 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: alertmanager-silencer
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.3.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
dependsOn:
- name: kube-prometheus-stack
namespace: observability
values:
controllers:
alertmanager-silencer:
type: cronjob
cronjob:
schedule: "@daily"
containers:
app:
image:
repository: ghcr.io/onedr0p/kubanetics
tag: 2024.7.1@sha256:020ec6f00b9cdc0ee247d2fd34d3951ac32718326bb90c38e947eed9d555de6c
env:
SCRIPT_NAME: alertmanager-silencer.sh
ALERTMANAGER_URL: http://alertmanager-operated.observability.svc.cluster.local:9093
MATCHERS_0: alertname=NodeCPUHighUsage job=node-exporter
MATCHERS_1: alertname=CPUThrottlingHigh container=gc
MATCHERS_2: alertname=CPUThrottlingHigh container=worker
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities: { drop: ["ALL"] }
resources:
requests:
cpu: 25m
limits:
memory: 128Mi
pod:
securityContext:
runAsUser: 568
runAsGroup: 568
runAsNonRoot: true

View file

@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app alertmanager-silencer
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/alertmanager-silencer/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -0,0 +1,61 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: grafana-secret
namespace: observability
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: grafana-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: "{{ .authentik_grafana_oauth_client_secret }}"
GF_DATE_FORMATS_USE_BROWSER_LOCALE: "true"
GF_SERVER_ROOT_URL: https://grafana.hsn.dev
GF_DATABASE_NAME: "{{ .grafana_GF_DATABASE_NAME }}"
GF_DATABASE_HOST: "postgres-primary-real.database.svc"
GF_DATABASE_USER: "{{ .grafana_GF_DATABASE_USER }}"
GF_DATABASE_PASSWORD: "{{ .grafana_GF_DATABASE_PASSWORD }}"
GF_DATABASE_SSL_MODE: "require"
GF_DATABASE_TYPE: postgres
GF_ANALYTICS_CHECK_FOR_UPDATES: "false"
GF_ANALYTICS_CHECK_FOR_PLUGIN_UPDATES: "false"
GF_ANALYTICS_REPORTING_ENABLED: "false"
GF_AUTH_ANONYMOUS_ENABLED: "false"
GF_AUTH_BASIC_ENABLED: "false"
GF_AUTH_GENERIC_OAUTH_ENABLED: "true"
GF_AUTH_GENERIC_OAUTH_API_URL: https://auth.hsn.dev/application/o/userinfo/
GF_AUTH_GENERIC_OAUTH_AUTH_URL: https://auth.hsn.dev/application/o/authorize/
GF_AUTH_GENERIC_OAUTH_TOKEN_URL: https://auth.hsn.dev/application/o/token/
GF_AUTH_GENERIC_OAUTH_CLIENT_ID: CoV7ae1HxuNzwCbVPf3U7TfYMX2rVqC5T9RAUo5M
GF_AUTH_GENERIC_OAUTH_EMPTY_SCOPES: "false"
GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: "contains(groups[*], 'Grafana Admins') && 'Admin' || contains(groups[*], 'Grafana Editors') && 'Editor' || 'Viewer'"
GF_AUTH_GENERIC_OAUTH_SCOPES: openid profile email groups
GF_AUTH_OAUTH_AUTO_LOGIN: "true"
GF_EXPLORE_ENABLED: "true"
GF_FEATURE_TOGGLES_ENABLE: publicDashboards
GF_LOG_MODE: console
GF_NEWS_NEWS_FEED_ENABLED: "false"
GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS: natel-discrete-panel,pr0ps-trackmap-panel,panodata-map-panel
GF_SECURITY_COOKIE_SAMESITE: grafana
GF_SECURITY_ANGULAR_SUPPORT_ENABLED: "true"
dataFrom:
- extract:
key: Authentik
rewrite:
- regexp:
source: "(.*)"
target: "authentik_$1"
- extract:
key: grafana
rewrite:
- regexp:
source: "(.*)"
target: "grafana_$1"

View file

@ -0,0 +1,401 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: grafana
spec:
interval: 30m
chart:
spec:
chart: grafana
version: 8.3.7
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: kube-prometheus-stack
namespace: observability
- name: loki
namespace: observability
values:
replicas: 1
envFromSecret: grafana-secret
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: default
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default-folder
- name: ceph
orgId: 1
folder: Ceph
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/ceph-folder
- name: crunchy-postgres
orgId: 1
folder: Crunchy-postgres
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/crunchy-postgres-folder
- name: flux
orgId: 1
folder: Flux
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/flux-folder
- name: kubernetes
orgId: 1
folder: Kubernetes
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/kubernetes-folder
- name: nginx
orgId: 1
folder: Nginx
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/nginx-folder
- name: prometheus
orgId: 1
folder: Prometheus
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/prometheus-folder
- name: thanos
orgId: 1
folder: Thanos
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/thanos-folder
- name: unifi
orgId: 1
folder: Unifi
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/unifi-folder
datasources:
datasources.yaml:
apiVersion: 1
deleteDatasources:
- { name: Alertmanager, orgId: 1 }
- { name: Loki, orgId: 1 }
- { name: Prometheus, orgId: 1 }
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
url: http://thanos-query-frontend.observability.svc.cluster.local:10902
jsonData:
prometheusType: Thanos
timeInterval: 1m
isDefault: true
- name: Loki
type: loki
uid: loki
access: proxy
url: http://loki-gateway.observability.svc.cluster.local
jsonData:
maxLines: 250
- name: Alertmanager
type: alertmanager
uid: alertmanager
access: proxy
url: http://alertmanager-operated.observability.svc.cluster.local:9093
jsonData:
implementation: prometheus
dashboards:
default:
cloudflared:
# renovate: depName="Cloudflare Tunnels (cloudflared)"
gnetId: 17457
revision: 6
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
external-dns:
# renovate: depName="External-dns"
gnetId: 15038
revision: 3
datasource: Prometheus
minio:
# renovate: depName="MinIO Dashboard"
gnetId: 13502
revision: 25
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
node-exporter-full:
# renovate: depName="Node Exporter Full"
gnetId: 1860
revision: 33
datasource: Prometheus
postgres:
# renovate: depName="PostgreSQL Database"
gnetId: 9628
revision: 7
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
smartctl-exporter:
# renovate: depName="smartctl_exporter"
gnetId: 20204
revision: 1
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
spegel:
# renovate: depName="Spegel"
gnetId: 18089
revision: 1
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
unpackerr:
# renovate: depName="Unpackerr"
gnetId: 18817
revision: 1
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
zfs:
# renovate: depName="ZFS"
gnetId: 7845
revision: 4
datasource: Prometheus
dragonflydb:
url: https://raw.githubusercontent.com/dragonflydb/dragonfly/main/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
cert-manager:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
datasource: Prometheus
external-secrets:
url: https://raw.githubusercontent.com/external-secrets/external-secrets/main/docs/snippets/dashboard.json
datasource: Prometheus
node-feature-discovery:
url: https://raw.githubusercontent.com/kubernetes-sigs/node-feature-discovery/master/examples/grafana-dashboard.json
datasource: Prometheus
crunchy-postgres:
pgbackrest:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/pgbackrest.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
pods:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/pod_details.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/postgresql_details.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql-overview:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/postgresql_overview.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql-health:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/postgresql_service_health.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql-alerts:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/prometheus_alerts.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
query-stats:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/query_statistics.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
ceph:
ceph-cluster:
# renovate: depName="Ceph Cluster"
gnetId: 2842
revision: 17
datasource: Prometheus
ceph-osd:
# renovate: depName="Ceph - OSD (Single)"
gnetId: 5336
revision: 9
datasource: Prometheus
ceph-pools:
# renovate: depName="Ceph - Pools"
gnetId: 5342
revision: 9
datasource: Prometheus
flux:
flux-cluster:
url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/cluster.json
datasource: Prometheus
flux-control-plane:
url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/control-plane.json
datasource: Prometheus
kubernetes:
kubernetes-api-server:
# renovate: depName="Kubernetes / System / API Server"
gnetId: 15761
revision: 16
datasource: Prometheus
kubernetes-coredns:
# renovate: depName="Kubernetes / System / CoreDNS"
gnetId: 15762
revision: 17
datasource: Prometheus
kubernetes-global:
# renovate: depName="Kubernetes / Views / Global"
gnetId: 15757
revision: 37
datasource: Prometheus
kubernetes-namespaces:
# renovate: depName="Kubernetes / Views / Namespaces"
gnetId: 15758
revision: 34
datasource: Prometheus
kubernetes-nodes:
# renovate: depName="Kubernetes / Views / Nodes"
gnetId: 15759
revision: 29
datasource: Prometheus
kubernetes-pods:
# renovate: depName="Kubernetes / Views / Pods"
gNetId: 15760
revision: 21
datasource: Prometheus
kubernetes-volumes:
# renovate: depName="K8s / Storage / Volumes / Cluster"
gnetId: 11454
revision: 14
datasource: Prometheus
nginx:
nginx:
url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/nginx.json
datasource: Prometheus
nginx-request-handling-performance:
url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/request-handling-performance.json
datasource: Prometheus
prometheus:
prometheus:
# renovate: depName="Prometheus"
gnetId: 19105
revision: 3
datasource: Prometheus
thanos:
thanos-bucket-replicate:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/bucket-replicate.json
datasource: Prometheus
thanos-compact:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/compact.json
datasource: Prometheus
thanos-overview:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/overview.json
datasource: Prometheus
thanos-query:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/query.json
datasource: Prometheus
thanos-query-frontend:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/query-frontend.json
datasource: Prometheus
thanos-receieve:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/receive.json
datasource: Prometheus
thanos-rule:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/rule.json
datasource: Prometheus
thanos-sidecar:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/sidecar.json
datasource: Prometheus
thanos-store:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/store.json
datasource: Prometheus
unifi:
unifi-insights:
# renovate: depName="UniFi-Poller: Client Insights - Prometheus"
gnetId: 11315
revision: 9
datasource: Prometheus
unifi-network-sites:
# renovate: depName="UniFi-Poller: Network Sites - Prometheus"
gnetId: 11311
revision: 5
datasource: Prometheus
unifi-uap:
# renovate: depName="UniFi-Poller: UAP Insights - Prometheus"
gnetId: 11314
revision: 10
datasource: Prometheus
unifi-usw:
# renovate: depName="UniFi-Poller: USW Insights - Prometheus"
gnetId: 11312
revision: 9
datasource: Prometheus
sidecar:
dashboards:
enabled: true
searchNamespace: ALL
labelValue: ""
label: grafana_dashboard
folderAnnotation: grafana_folder
provider:
disableDelete: true
foldersFromFilesStructure: true
datasources:
enabled: true
searchNamespace: ALL
labelValue: ""
plugins:
- grafana-clock-panel
- grafana-piechart-panel
- grafana-worldmap-panel
- natel-discrete-panel
- pr0ps-trackmap-panel
- vonage-status-panel
serviceMonitor:
enabled: true
ingress:
enabled: true
ingressClassName: external-nginx
annotations:
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
external-dns.alpha.kubernetes.io/target: external.hsn.dev
hosts:
- &host grafana.hsn.dev
tls:
- hosts:
- *host
persistence:
enabled: false
testFramework:
enabled: false
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: grafana

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml

View file

@ -0,0 +1,29 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app grafana
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: crunchy-postgres-operator
- name: external-secrets-stores
path: ./kubernetes/apps/observability/grafana/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
DB_NAME: grafana
DB_USER: grafana

View file

@ -0,0 +1,22 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: alertmanager
spec:
refreshInterval: 5m
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: alertmanager-secret
template:
templateFrom:
- configMap:
name: alertmanager-config-tpl
items:
- key: alertmanager.yaml
dataFrom:
- extract:
key: pushover

View file

@ -0,0 +1,190 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: kube-prometheus-stack
version: 61.6.0
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
crds: CreateReplace
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: CreateReplace
remediation:
strategy: rollback
retries: 3
values:
crds:
enabled: true
cleanPrometheusOperatorObjectNames: true
alertmanager:
ingress:
enabled: true
pathType: Prefix
ingressClassName: internal-nginx
hosts:
- &host alertmanager.jahanson.tech
tls:
- hosts:
- *host
alertmanagerSpec:
replicas: 1
useExistingSecret: true
configSecret: alertmanager-secret
storage:
volumeClaimTemplate:
spec:
storageClassName: openebs-hostpath
resources:
requests:
storage: 1Gi
kubelet:
enabled: true
serviceMonitor:
metricRelabelings:
# Drop high cardinality labels
- action: labeldrop
regex: (uid)
- action: labeldrop
regex: (id|name)
- action: drop
sourceLabels: ["__name__"]
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
kubeApiServer:
enabled: true
serviceMonitor:
metricRelabelings:
# Drop high cardinality labels
- action: drop
sourceLabels: ["__name__"]
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
- action: drop
sourceLabels: ["__name__"]
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
kubeControllerManager:
enabled: true
endpoints: &cp
- 10.1.1.61
kubeEtcd:
enabled: true
endpoints: *cp
kubeScheduler:
enabled: true
endpoints: *cp
kubeProxy:
enabled: false
prometheus:
ingress:
enabled: true
ingressClassName: internal-nginx
pathType: Prefix
hosts:
- &host prometheus.jahanson.tech
tls:
- hosts:
- *host
thanosService:
enabled: true
thanosServiceMonitor:
enabled: true
# thanosServiceExternal:
# enabled: true
# type: LoadBalancer
# annotations:
# external-dns.alpha.kubernetes.io/hostname: thanos.jahanson.tech
# io.cilium/lb-ipam-ips: 10.45.0.6
# externalTrafficPolicy: Cluster
prometheusSpec:
podMetadata:
annotations:
secret.reloader.stakater.com/reload: &secret thanos-objstore-config
replicas: 1
replicaExternalLabelName: __replica__
scrapeInterval: 1m # Must match interval in Grafana Helm chart
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
probeSelectorNilUsesHelmValues: false
scrapeConfigSelectorNilUsesHelmValues: false
enableAdminAPI: true
walCompression: true
enableFeatures:
- auto-gomemlimit
- memory-snapshot-on-shutdown
- new-service-discovery-manager
image:
registry: quay.io
repository: prometheus/prometheus
tag: v2.51.0-dedupelabels
thanos:
image: quay.io/thanos/thanos:${THANOS_VERSION}
version: "${THANOS_VERSION#v}"
objectStorageConfig:
existingSecret:
name: *secret
key: config
retention: 2d
retentionSize: 15GB
externalLabels:
cluster: main
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: openebs-hostpath
resources:
requests:
storage: 20Gi
nodeExporter:
enabled: true
prometheus-node-exporter:
fullnameOverride: node-exporter
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
kubeStateMetrics:
enabled: true
kube-state-metrics:
fullnameOverride: kube-state-metrics
metricLabelsAllowlist:
- pods=[*]
- deployments=[*]
- persistentvolumeclaims=[*]
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
grafana:
enabled: false
forceDeployDashboards: true
sidecar:
dashboards:
annotations:
grafana_folder: Kubernetes
multicluster:
etcd:
enabled: true

View file

@ -0,0 +1,16 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
- ./prometheusrules
# - ./scrapeconfigs
- ./podmonitors
configMapGenerator:
- name: alertmanager-config-tpl
files:
- alertmanager.yaml=./resources/alertmanager.yaml
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,34 @@
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/podmonitor_v1.json
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: crunchy-postgres-exporter
spec:
selector:
matchLabels:
postgres-operator.crunchydata.com/crunchy-postgres-exporter: 'true'
namespaceSelector:
matchNames:
- database
- media
podMetricsEndpoints:
- port: "exporter"
relabelings:
- sourceLabels: [__meta_kubernetes_pod_container_port_number]
action: keep
regex: "9187"
- sourceLabels: [__meta_kubernetes_namespace]
targetLabel: kubernetes_namespace
- sourceLabels: [__meta_kubernetes_pod_name]
targetLabel: pod
- sourceLabels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_postgres_operator_crunchydata_com_cluster]
separator: ":"
targetLabel: pg_cluster
replacement: "$1$2"
- sourceLabels: [__meta_kubernetes_pod_ip]
targetLabel: ip
- sourceLabels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_instance]
targetLabel: deployment
- sourceLabels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_role]
targetLabel: role

View file

@ -0,0 +1,19 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/podmonitor_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: dragonflydb-metrics
namespace: database
spec:
selector:
matchLabels:
app.kubernetes.io/name: dragonfly
app: dragonfly
podTargetLabels:
- app
namespaceSelector:
matchNames:
- database
podMetricsEndpoints:
- port: admin

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./crunchy-postgres.yaml
- ./dragonflydb.yaml

View file

@ -0,0 +1,6 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./prometheusrule.yaml

View file

@ -0,0 +1,37 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: miscellaneous-rules
labels:
prometheus: k8s
role: alert-rules
spec:
groups:
- name: dockerhub
rules:
- alert: BootstrapRateLimitRisk
annotations:
summary: Kubernetes cluster at risk of being rate limited by dockerhub on bootstrap
expr: count(time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30) > 100
for: 15m
labels:
severity: critical
- name: oom
rules:
- alert: OOMKilled
annotations:
summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
labels:
severity: critical
- name: zfs
rules:
- alert: ZfsUnexpectedPoolState
annotations:
summary: ZFS pool {{$labels.zpool}} on {{$labels.instance}} is in a unexpected state {{$labels.state}}
expr: node_zfs_zpool_state{state!="online"} > 0
for: 15m
labels:
severity: critical

View file

@ -0,0 +1,68 @@
---
global:
resolve_timeout: 5m
route:
group_by: ["alertname", "job"]
group_interval: 10m
group_wait: 1m
receiver: pushover
repeat_interval: 12h
routes:
- receiver: heartbeat
group_interval: 5m
group_wait: 0s
matchers:
- alertname =~ "Watchdog"
repeat_interval: 5m
- receiver: "null"
matchers:
- alertname =~ "InfoInhibitor"
- receiver: pushover
continue: true
matchers:
- severity = "critical"
inhibit_rules:
- equal: ["alertname", "namespace"]
source_matchers:
- severity = "critical"
target_matchers:
- severity = "warning"
receivers:
- name: heartbeat
webhook_configs:
- send_resolved: true
url: "{{ .alertmanager_heartbeat_url }}"
- name: "null"
- name: pushover
pushover_configs:
- html: true
# Compooters are hard
message: |-
{{ "{{-" }} range .Alerts {{ "}}" }}
{{ "{{-" }} if ne .Annotations.description "" {{ "}}" }}
{{ "{{" }} .Annotations.description {{ "}}" }}
{{ "{{-" }} else if ne .Annotations.summary "" {{ "}}" }}
{{ "{{" }} .Annotations.summary {{ "}}" }}
{{ "{{-" }} else if ne .Annotations.message "" {{ "}}" }}
{{ "{{" }} .Annotations.message {{ "}}" }}
{{ "{{-" }} else {{ "}}" }}
Alert description not available
{{ "{{-" }} end {{ "}}" }}
{{ "{{-" }} if gt (len .Labels.SortedPairs) 0 {{ "}}" }}
<small>
{{ "{{-" }} range .Labels.SortedPairs {{ "}}" }}
<b>{{ "{{" }} .Name {{ "}}" }}:</b> {{ "{{" }} .Value {{ "}}" }}
{{ "{{-" }} end {{ "}}" }}
</small>
{{ "{{-" }} end {{ "}}" }}
{{ "{{-" }} end {{ "}}" }}
priority: |-
{{ "{{" }} if eq .Status "firing" {{ "}}" }}1{{ "{{" }} else {{ "}}" }}0{{ "{{" }} end {{ "}}" }}
send_resolved: true
sound: gamelan
title: >-
{{ "{{" }} .CommonLabels.alertname {{ "}}" }}
[{{ "{{" }} .Status | toUpper {{ "}}" }}{{ "{{" }} if eq .Status "firing" {{ "}}" }}:{{ "{{" }} .Alerts.Firing | len {{ "}}" }}{{ "{{" }} end {{ "}}" }}]
token: "{{ .alertmanager_token }}"
url_title: View in Alertmanager
user_key: "{{ .userkey_jahanson }}"

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./node-exporter.yaml
- ./zfs-exporter.yaml

View file

@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: node-exporter
spec:
staticConfigs:
- targets:
- 10.1.1.1:9100
metricsPath: /metrics

View file

@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: zfs-exporter
spec:
staticConfigs:
- targets:
- 10.1.1.13:9134
metricsPath: /metrics

View file

@ -0,0 +1,29 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app kube-prometheus-stack
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: openebs
- name: volsync
path: ./kubernetes/apps/observability/kube-prometheus-stack/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 15m
postBuild:
substitute:
# renovate: datasource=docker depName=quay.io/thanos/thanos
THANOS_VERSION: v0.34.1

View file

@ -0,0 +1,28 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: loki
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: loki-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
S3_HOST: s3.hsn.dev
S3_BUCKET: "{{ .minio_thanos_bucket_name }}"
S3_ACCESS_KEY: "{{ .minio_loki_access_key }}"
S3_SECRET_KEY: "{{ .minio_loki_secret_key }}"
S3_REGION: us-east-1
dataFrom:
- extract:
key: minio
rewrite:
- regexp:
source: "(.*)"
target: "minio_$1"

View file

@ -0,0 +1,138 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: loki
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: loki
version: 6.7.3
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: uninstall
retries: 3
valuesFrom:
- targetPath: loki.storage.bucketNames.chunks
kind: Secret
name: loki-secret
valuesKey: S3_BUCKET
- targetPath: loki.storage.s3.endpoint
kind: Secret
name: loki-secret
valuesKey: S3_HOST
- targetPath: loki.storage.s3.region
kind: Secret
name: loki-secret
valuesKey: S3_REGION
- targetPath: loki.storage.s3.accessKeyId
kind: Secret
name: loki-secret
valuesKey: S3_ACCESS_KEY
- targetPath: loki.storage.s3.secretAccessKey
kind: Secret
name: loki-secret
valuesKey: S3_SECRET_KEY
values:
deploymentMode: SimpleScalable
loki:
podAnnotations:
secret.reloader.stakater.com/reload: loki-secret
ingester:
chunk_encoding: snappy
storage:
type: s3
s3:
s3ForcePathStyle: true
insecure: true
schemaConfig:
configs:
- from: "2024-04-01"
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
structuredConfig:
auth_enabled: false
server:
log_level: info
http_listen_port: 3100
grpc_listen_port: 9095
grpc_server_max_recv_msg_size: 8388608
grpc_server_max_send_msg_size: 8388608
limits_config:
ingestion_burst_size_mb: 128
ingestion_rate_mb: 64
max_query_parallelism: 100
per_stream_rate_limit: 64M
per_stream_rate_limit_burst: 128M
reject_old_samples: true
reject_old_samples_max_age: 168h
retention_period: 30d
shard_streams:
enabled: true
split_queries_by_interval: 1h
query_scheduler:
max_outstanding_requests_per_tenant: 4096
frontend:
max_outstanding_per_tenant: 4096
ruler:
enable_api: true
enable_alertmanager_v2: true
alertmanager_url: http://alertmanager-operated.observability.svc.cluster.local:9093
storage:
type: local
local:
directory: /rules
rule_path: /rules/fake
analytics:
reporting_enabled: false
backend:
replicas: 1
persistence:
size: 20Gi
storageClass: openebs-hostpath
gateway:
replicas: 1
image:
registry: ghcr.io
ingress:
enabled: true
ingressClassName: internal-nginx
hosts:
- host: &host loki.jahanson.tech
paths:
- path: /
pathType: Prefix
tls:
- hosts: [*host]
read:
replicas: 1
write:
replicas: 1
persistence:
size: 20Gi
storageClass: openebs-hostpath
sidecar:
image:
repository: ghcr.io/kiwigrid/k8s-sidecar
rules:
searchNamespace: ALL
folder: /rules/fake
lokiCanary:
enabled: false
test:
enabled: false

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml

View file

@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app loki
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: openebs
- name: vector
path: ./kubernetes/apps/observability/loki/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 15m

View file

@ -0,0 +1,28 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: thanos
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: thanos-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
S3_HOST: s3.hsn.dev
S3_BUCKET: "{{ .minio_thanos_bucket_name }}"
S3_ACCESS_KEY: "{{ .minio_thanos_access_key }}"
S3_SECRET_KEY: "{{ .minio_thanos_secret_key }}"
S3_REGION: us-east-1
dataFrom:
- extract:
key: Minio
rewrite:
- regexp:
source: "(.*)"
target: "minio_$1"

View file

@ -0,0 +1,120 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: thanos
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: thanos
version: 1.17.2
sourceRef:
kind: HelmRepository
name: stevehipwell
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
valuesFrom:
- targetPath: objstoreConfig.value.config.bucket
kind: Secret
name: thanos-secret
valuesKey: S3_BUCKET
- targetPath: objstoreConfig.value.config.endpoint
kind: Secret
name: thanos-secret
valuesKey: S3_HOST
- targetPath: objstoreConfig.value.config.region
kind: Secret
name: thanos-secret
valuesKey: S3_REGION
- targetPath: objstoreConfig.value.config.access_key
kind: Secret
name: thanos-secret
valuesKey: S3_ACCESS_KEY
- targetPath: objstoreConfig.value.config.secret_key
kind: Secret
name: thanos-secret
valuesKey: S3_SECRET_KEY
values:
objstoreConfig:
value:
type: s3
config:
insecure: false
additionalEndpoints:
- dnssrv+_grpc._tcp.kube-prometheus-stack-thanos-discovery.observability.svc.cluster.local
additionalReplicaLabels: ["__replica__"]
serviceMonitor:
enabled: true
compact:
enabled: true
extraArgs:
- --compact.concurrency=4
- --delete-delay=30m
- --retention.resolution-raw=14d
- --retention.resolution-5m=30d
- --retention.resolution-1h=60d
persistence: &persistence
enabled: true
storageClass: openebs-hostpath
size: 10Gi
query:
replicas: 1
extraArgs: ["--alert.query-url=https://thanos.jahanson.tech"]
queryFrontend:
enabled: true
replicas: 1
extraEnv: &extraEnv
- name: THANOS_CACHE_CONFIG
valueFrom:
configMapKeyRef:
name: &configMap thanos-cache-configmap
key: cache.yaml
extraArgs: ["--query-range.response-cache-config=$(THANOS_CACHE_CONFIG)"]
ingress:
enabled: true
ingressClassName: internal-nginx
hosts:
- &host thanos.jahanson.tech
tls:
- hosts: [*host]
podAnnotations: &podAnnotations
configmap.reloader.stakater.com/reload: *configMap
rule:
enabled: true
replicas: 1
extraArgs: ["--web.prefix-header=X-Forwarded-Prefix"]
alertmanagersConfig:
value: |-
alertmanagers:
- api_version: v2
static_configs:
- dnssrv+_http-web._tcp.alertmanager-operated.observability.svc.cluster.local
rules:
value: |-
groups:
- name: PrometheusWatcher
rules:
- alert: PrometheusDown
annotations:
summary: A Prometheus has disappeared from Prometheus target discovery
expr: absent(up{job="kube-prometheus-stack-prometheus"})
for: 5m
labels:
severity: critical
persistence: *persistence
storeGateway:
replicas: 1
extraEnv: *extraEnv
extraArgs: ["--index-cache.config=$(THANOS_CACHE_CONFIG)"]
persistence: *persistence
podAnnotations: *podAnnotations

View file

@ -0,0 +1,13 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ./externalsecret.yaml
configMapGenerator:
- name: thanos-cache-configmap
files:
- cache.yaml=./resources/cache.yml
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,5 @@
---
type: REDIS
config:
addr: dragonfly.database.svc.cluster.local:6379
db: 1

View file

@ -0,0 +1,25 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app thanos
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: openebs
- name: dragonfly-operator
path: ./kubernetes/apps/observability/thanos/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 15m

View file

@ -0,0 +1,103 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: vector-agent
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: app-template
version: 3.3.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
dependsOn:
- name: vector-aggregator
namespace: observability
values:
controllers:
vector:
type: daemonset
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: docker.io/timberio/vector
tag: 0.40.0-alpine@sha256:7a81fdd62e056321055a9e4bdec4073d752ecf68f4c192e676b85001721523c2
env:
PROCFS_ROOT: /host/proc
SYSFS_ROOT: /host/sys
VECTOR_SELF_NODE_NAME:
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
VECTOR_SELF_POD_NAME:
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
VECTOR_SELF_POD_NAMESPACE:
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
args: ["--config", "/etc/vector/vector.yaml"]
securityContext:
privileged: true
serviceAccount:
create: true
name: vector-agent
persistence:
config:
enabled: true
type: configMap
name: vector-agent-configmap
globalMounts:
- path: /etc/vector/vector.yaml
subPath: vector.yaml
readOnly: true
data:
type: emptyDir
globalMounts:
- path: /vector-data-dir
procfs:
type: hostPath
hostPath: /proc
hostPathType: Directory
globalMounts:
- path: /host/proc
readOnly: true
sysfs:
type: hostPath
hostPath: /sys
hostPathType: Directory
globalMounts:
- path: /host/sys
readOnly: true
var-lib:
type: hostPath
hostPath: /var/lib
hostPathType: Directory
globalMounts:
- readOnly: true
var-log:
type: hostPath
hostPath: /var/log
hostPathType: Directory
globalMounts:
- readOnly: true

View file

@ -0,0 +1,13 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ./rbac.yaml
configMapGenerator:
- name: vector-agent-configmap
files:
- vector.yaml=./resources/vector.yaml
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,22 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: vector-agent
rules:
- apiGroups: [""]
resources: ["namespaces", "nodes", "pods"]
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: vector-agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: vector-agent
subjects:
- kind: ServiceAccount
name: vector-agent
namespace: observability

View file

@ -0,0 +1,25 @@
---
data_dir: /vector-data-dir
sources:
kubernetes_source:
type: kubernetes_logs
use_apiserver_cache: true
pod_annotation_fields:
container_image: container_image
container_name: container_name
pod_labels: pod_labels
pod_name: pod_name
pod_annotations: ""
namespace_annotation_fields:
namespace_labels: ""
node_annotation_fields:
node_labels: ""
sinks:
kubernetes:
type: vector
compression: true
version: "2"
address: vector-aggregator.observability.svc.cluster.local:6010
inputs: ["kubernetes_source"]

View file

@ -0,0 +1,20 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: vector-aggregator
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: vector-aggregator-secret
template:
engineVersion: v2
data:
GEOIPUPDATE_ACCOUNT_ID: "{{ .account_id }}"
GEOIPUPDATE_LICENSE_KEY: "{{ .vector_license_key }}"
dataFrom:
- extract:
key: maxmind

View file

@ -0,0 +1,91 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app vector-aggregator
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: app-template
version: 3.3.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
vector-aggregator:
replicas: 1
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"
initContainers:
init-geoip:
image:
repository: ghcr.io/maxmind/geoipupdate
tag: v7.0.1@sha256:80c57598a9ff552953e499cefc589cfe7b563d64262742ea42f2014251b557b0
env:
GEOIPUPDATE_EDITION_IDS: GeoLite2-City
GEOIPUPDATE_FREQUENCY: "0"
GEOIPUPDATE_VERBOSE: "1"
envFrom:
- secretRef:
name: vector-aggregator-secret
containers:
app:
image:
repository: docker.io/timberio/vector
tag: 0.40.0-alpine@sha256:7a81fdd62e056321055a9e4bdec4073d752ecf68f4c192e676b85001721523c2
args: ["--config", "/etc/vector/vector.yaml"]
pod:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: *app
service:
app:
controller: vector-aggregator
type: LoadBalancer
annotations:
external-dns.alpha.kubernetes.io/hostname: vector.jahanson.tech
io.cilium/lb-ipam-ips: 10.1.1.33
ports:
http:
port: 8686
journald:
port: 6000
kubernetes:
port: 6010
vyos:
port: 6020
persistence:
config:
enabled: true
type: configMap
name: vector-aggregator-configmap
globalMounts:
- path: /etc/vector/vector.yaml
subPath: vector.yaml
readOnly: true
data:
type: emptyDir
globalMounts:
- path: /vector-data-dir
geoip:
type: emptyDir
globalMounts:
- path: /usr/share/GeoIP

View file

@ -0,0 +1,13 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
configMapGenerator:
- name: vector-aggregator-configmap
files:
- vector.yaml=./resources/vector.yaml
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,132 @@
---
data_dir: /vector-data-dir
api:
enabled: true
address: 0.0.0.0:8686
enrichment_tables:
geoip_table:
type: geoip
path: /usr/share/GeoIP/GeoLite2-City.mmdb
#
# Sources
#
sources:
journald_source:
type: vector
version: "2"
address: 0.0.0.0:6000
kubernetes_source:
type: vector
version: "2"
address: 0.0.0.0:6010
vyos_source:
type: syslog
address: 0.0.0.0:6020
mode: tcp
#
# Transforms
#
transforms:
kubernetes_remap:
type: remap
inputs: ["kubernetes_source"]
source: |
# Standardize 'app' index
.custom_app_name = .pod_labels."app.kubernetes.io/name" || .pod_labels.app || .pod_labels."k8s-app" || "unknown"
# Drop pod_labels
del(.pod_labels)
# [63950.153039] [wan-local-default-D]IN=eth4 OUT= MAC=xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx SRC=xxx.xxx.xxx.xxx DST=xxx.xxx.xxx.xxx LEN=40 TOS=0x00 PREC=0x00 TTL=240 ID=60610 PROTO=TCP SPT=53451 DPT=2002 WINDOW=1024 RES=0x00 SYN URGP=0
vyos_firewall_route:
type: route
inputs: ["vyos_source"]
route:
firewall: |
.facility == "kern" && match!(.message, r'^\[(.*?)\].(.*)')
vyos_firewall_remap:
type: remap
inputs: ["vyos_firewall_route.firewall"]
source: |
# Parse firewall rule message
split_message, split_err = parse_regex(.message, r'^\[.*\].\[(?P<rule>.*?)\](?P<fields>.*)')
if split_err != null {
abort
}
# Extract separate fields from message
split_message.fields, split_err = strip_whitespace(split_message.fields)
if split_err != null {
abort
}
.message, parse_err = parse_key_value(split_message.fields, whitespace: "strict")
if parse_err != null {
abort
}
# Add more information about the triggered rule
.message.RULE, parse_err = parse_regex(split_message.rule, r'^ipv4-(?P<from_zone>\w+)-(?P<to_zone>\w+)-(?P<id>\w+)-(?P<action>\w+)$')
if parse_err != null {
abort
}
vyos_firewall_wan_route:
type: route
inputs: ["vyos_firewall_remap"]
route:
from_wan: .message.RULE.from_zone == "wan"
vyos_firewall_geoip_remap:
type: remap
inputs: ["vyos_firewall_wan_route.from_wan"]
source: |
.geoip = get_enrichment_table_record!(
"geoip_table", {
"ip": .message.SRC
}
)
#
# Sinks
#
sinks:
journald:
inputs: ["journald_source"]
type: loki
endpoint: http://loki-gateway.observability.svc.cluster.local
encoding: { codec: json }
out_of_order_action: accept
remove_label_fields: true
remove_timestamp: true
labels:
hostname: '{{ host }}'
kubernetes:
inputs: ["kubernetes_remap"]
type: loki
endpoint: http://loki-gateway.observability.svc.cluster.local
encoding: { codec: json }
out_of_order_action: accept
remove_label_fields: true
remove_timestamp: true
labels:
app: '{{ custom_app_name }}'
namespace: '{{ kubernetes.pod_namespace }}'
node: '{{ kubernetes.pod_node_name }}'
vyos:
inputs: ["vyos_source", "vyos_firewall_geoip_remap"]
type: loki
endpoint: http://loki-gateway.observability.svc.cluster.local
encoding: { codec: json }
out_of_order_action: accept
remove_label_fields: true
remove_timestamp: true
labels:
hostname: '{{ host }}'

Some files were not shown because too many files have changed in this diff Show more