Compare commits

..

1 commit

Author SHA1 Message Date
afcfde4606
revert 2024-08-08 10:56:18 -05:00
563 changed files with 27530 additions and 17837 deletions

9
.ansible-lint Normal file
View file

@ -0,0 +1,9 @@
---
skip_list:
- yaml[line-length]
- var-naming
warn_list:
- command-instead-of-shell
- deprecated-command-syntax
- experimental
- no-changed-when

View file

@ -0,0 +1,52 @@
---
# yaml-language-server: $schema=https://taskfile.dev/schema.json
version: "3"
vars:
PYTHON_BIN: python3
env:
PATH: "{{.ROOT_DIR}}/.venv/bin:$PATH"
VIRTUAL_ENV: "{{.ROOT_DIR}}/.venv"
ANSIBLE_COLLECTIONS_PATH: "{{.ROOT_DIR}}/.venv/galaxy"
ANSIBLE_ROLES_PATH: "{{.ROOT_DIR}}/.venv/galaxy/ansible_roles"
ANSIBLE_VARS_ENABLED: "host_group_vars,community.sops.sops"
tasks:
deps:
desc: Set up Ansible dependencies for the environment
cmds:
- task: .venv
run:
desc: Run an Ansible playbook for configuring a cluster
summary: |
Args:
cluster: Cluster to run command against (required)
playbook: Playbook to run (required)
prompt: Run Ansible playbook '{{.playbook}}' against the '{{.cluster}}' cluster... continue?
deps: ["deps"]
cmd: |
.venv/bin/ansible-playbook \
--inventory {{.ANSIBLE_DIR}}/{{.cluster}}/inventory/hosts.yaml \
{{.ANSIBLE_DIR}}/{{.cluster}}/playbooks/{{.playbook}}.yaml {{.CLI_ARGS}}
preconditions:
- { msg: "Argument (cluster) is required", sh: "test -n {{.cluster}}" }
- { msg: "Argument (playbook) is required", sh: "test -n {{.playbook}}" }
- { msg: "Venv not found", sh: "test -d {{.ROOT_DIR}}/.venv" }
- { msg: "Inventory not found", sh: "test -f {{.ANSIBLE_DIR}}/{{.cluster}}/inventory/hosts.yaml" }
- { msg: "Playbook not found", sh: "test -f {{.ANSIBLE_DIR}}/{{.cluster}}/playbooks/{{.playbook}}.yaml" }
.venv:
internal: true
cmds:
- true && {{.PYTHON_BIN}} -m venv {{.ROOT_DIR}}/.venv
- .venv/bin/python3 -m pip install --upgrade pip setuptools wheel
- .venv/bin/python3 -m pip install --upgrade --requirement {{.ANSIBLE_DIR}}/requirements.txt
- .venv/bin/ansible-galaxy install --role-file "{{.ANSIBLE_DIR}}/requirements.yaml" --force
sources:
- "{{.ANSIBLE_DIR}}/requirements.txt"
- "{{.ANSIBLE_DIR}}/requirements.yaml"
generates:
- "{{.ROOT_DIR}}/.venv/pyvenv.cfg"

View file

@ -0,0 +1,104 @@
---
version: "3"
x-task-vars: &task-vars
node: "{{.node}}"
ceph_disk: "{{.ceph_disk}}"
ts: "{{.ts}}"
jobName: "{{.jobName}}"
vars:
waitForJobScript: "../_scripts/wait-for-k8s-job.sh"
ts: '{{now | date "150405"}}'
tasks:
wipe-node-aule:
desc: Trigger a wipe of Rook-Ceph data on node "aule"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460833"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: aule
wipe-node-orome:
desc: Trigger a wipe of Rook-Ceph data on node "orome"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37645333"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: orome
wipe-node-eonwe:
desc: Trigger a wipe of Rook-Ceph data on node "eonwe"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460887"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: eonwe
wipe-node-arlen:
desc: Trigger a wipe of Rook-Ceph data on node "arlen"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460897"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: arlen
wipe-disk:
desc: Wipe all remnants of rook-ceph from a given disk (ex. task rook:wipe-disk node=aule ceph_disk="/dev/nvme0n1")
silent: true
internal: true
cmds:
- envsubst < <(cat {{.wipeRookDiskJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} {{.wipeCephDiskJobName}} default
- kubectl -n default wait job/{{.wipeCephDiskJobName}} --for condition=complete --timeout=1m
- kubectl -n default logs job/{{.wipeCephDiskJobName}} --container list
- kubectl -n default delete job {{.wipeCephDiskJobName}}
vars:
node: '{{ or .node (fail "`node` is required") }}'
ceph_disk: '{{ or .ceph_disk (fail "`ceph_disk` is required") }}'
jobName: 'wipe-disk-{{- .node -}}-{{- .ceph_disk | replace "/" "-" -}}-{{- .ts -}}'
wipeRookDiskJobTemplate: "WipeDiskJob.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.wipeRookDiskJobTemplate}}
wipe-data:
desc: Wipe all remnants of rook-ceph from a given disk (ex. task rook:wipe-data node=aule)
silent: true
internal: true
cmds:
- envsubst < <(cat {{.wipeRookDataJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} {{.wipeRookDataJobName}} default
- kubectl -n default wait job/{{.wipeRookDataJobName}} --for condition=complete --timeout=1m
- kubectl -n default logs job/{{.wipeRookDataJobName}} --container list
- kubectl -n default delete job {{.wipeRookDataJobName}}
vars:
node: '{{ or .node (fail "`node` is required") }}'
jobName: "wipe-rook-data-{{- .node -}}-{{- .ts -}}"
wipeRookDataJobTemplate: "WipeRookDataJob.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.wipeRookDataJobTemplate}}

View file

@ -0,0 +1,26 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${jobName}"
namespace: "default"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${node}
containers:
- name: disk-wipe
image: docker.io/library/alpine:3.20.0
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted;
sgdisk --zap-all ${ceph_disk};
blkdiscard ${ceph_disk};
dd if=/dev/zero bs=1M count=10000 oflag=direct of=${ceph_disk};
partprobe ${ceph_disk};

View file

@ -0,0 +1,29 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${jobName}"
namespace: "default"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${node}
containers:
- name: disk-wipe
image: docker.io/library/alpine:3.20.0
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- rm -rf /mnt/host_var/lib/rook
volumeMounts:
- mountPath: /mnt/host_var
name: host-var
volumes:
- name: host-var
hostPath:
path: /var

View file

@ -0,0 +1,19 @@
apiVersion: v1
kind: Pod
metadata:
name: my-pod
spec:
containers:
- name: disk-wipe
image: docker.io/library/alpine:3.20.0
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted e2fsprogs;
sgdisk --zap-all /dev/nvme1n1;
blkdiscard /dev/nvme1n1;
dd if=/dev/zero bs=1M count=10000 oflag=direct of=/dev/nvme1n1;
sgdisk /dev/nvme1n1
partprobe /dev/nvme1n1;

View file

@ -1,29 +0,0 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app jellyfin
namespace: flux-system
spec:
targetNamespace: anime
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: node-feature-discovery
- name: nvidia-device-plugin
- name: volsync
path: ./kubernetes/apps/anime/jellyfin/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: false
interval: 30m
timeout: 5m
postBuild:
substitute:
APP: *app
GATUS_PATH: /web/index.html
VOLSYNC_CAPACITY: 20Gi

View file

@ -1,20 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: nicehash
spec:
refreshInterval: 1m
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: nicehash-secret
template:
type: Opaque
data:
MINING_ADDRESS: "{{ .MINING_ADDRESS }}"
dataFrom:
- extract:
key: nicehash

View file

@ -1,72 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: nicehash
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.5.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
controllers:
nicehash:
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: docker.io/dockerhubnh/nicehash
tag: latest
envFrom:
- secretRef:
name: nicehash-secret
env:
TZ: America/Chicago
MINING_WORKER_NAME: shadowfax
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities: { drop: ["ALL"] }
resources:
requests:
cpu: 10m
limits:
nvidia.com/gpu: 1 # requesting 1 GPU
memory: 10Gi
defaultPodOptions:
securityContext:
runAsNonRoot: true
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
seccompProfile: { type: RuntimeDefault }
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
persistence:
logs:
type: emptyDir
globalMounts:
- path: /var/log/
tmp:
type: emptyDir
cache:
existingClaim: nicehash
globalMounts:
- path: /var/cache/nhm4/

View file

@ -1,27 +0,0 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app nicehash
namespace: flux-system
spec:
targetNamespace: default
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: rook-ceph-cluster
path: ./kubernetes/apps/default/nicehash/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: false
interval: 30m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 1Gi

View file

@ -1,34 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: piped
spec:
refreshInterval: 1m
secretStoreRef:
name: crunchy-pgo-secrets
kind: ClusterSecretStore
target:
name: piped-secret
template:
type: Opaque
data:
config.properties: |
API_URL: https://piped-api.hsn.dev
COMPROMISED_PASSWORD_CHECK: true
DISABLE_REGISTRATION: true
FEED_RETENTION: 30
FRONTEND_URL: https://piped.hsn.dev
HTTP_WORKERS: 4
MATRIX_SERVER: https://element.infosec.exchange
PORT: 8080
PROXY_PART: https://piped-proxy.jahanson.tech
SENTRY_DSN:
hibernate.connection.driver_class: org.postgresql.Driver
hibernate.connection.url: jdbc:postgresql://{{ index . "host" }}:5432/{{ index . "dbname" }}
hibernate.connection.username: {{ index . "user" }}
hibernate.connection.password: {{ index . "password" }}
dataFrom:
- extract:
key: postgres-pguser-piped

View file

@ -1,182 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: piped
spec:
chart:
spec:
chart: app-template
version: 3.5.1
interval: 30m
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
interval: 30m
values:
defaultPodOptions:
automountServiceAccountToken: false
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
fsGroupChangePolicy: "OnRootMismatch"
controllers:
backend:
strategy: RollingUpdate
annotations:
secret.reloader.stakater.com/reload: piped-secret
containers:
app:
image:
repository: 1337kavin/piped
tag: latest@sha256:18e77857414236edc7245bebb3fb8ab3ac49c44bd76701bfce24f6ba0170d4b8
probes:
liveness:
enabled: true
readiness:
enabled: true
resources:
requests:
cpu: 10m
memory: 500Mi
limits:
memory: 2000Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
frontend:
strategy: RollingUpdate
containers:
app:
image:
repository: ghcr.io/bjw-s-labs/piped-frontend
tag: 2024.11.4@sha256:0e413986606f39cdc6afa0379feca912d4a4abbdcbe67b408c9fbe19fbabd10f
env:
BACKEND_HOSTNAME: piped-api.hsn.dev
probes:
liveness:
enabled: true
readiness:
enabled: true
resources:
requests:
cpu: 10m
memory: 32Mi
limits:
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
ytproxy:
strategy: RollingUpdate
containers:
app:
image:
repository: 1337kavin/piped-proxy
tag: latest@sha256:ab9e472107337886d71b0151b6e777fc4cba0dd8251a21d4788a7a7f165f545a
command:
- /app/piped-proxy
probes:
liveness:
enabled: true
readiness:
enabled: true
resources:
requests:
cpu: 10m
memory: 500Mi
limits:
memory: 2000Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
service:
backend:
controller: backend
ports:
http:
port: 8080
frontend:
controller: frontend
ports:
http:
port: 8080
ytproxy:
controller: ytproxy
ports:
http:
port: 8080
ingress:
backend:
className: "external-nginx"
annotations:
external-dns.alpha.kubernetes.io/target: external.hsn.dev
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
nginx.ingress.kubernetes.io/enable-cors: "true"
nginx.ingress.kubernetes.io/cors-allow-origin: "https://piped.hsn.dev, https://piped-api.hsn.dev, https://piped-proxy.jahanson.tech"
hosts:
- host: piped-api.hsn.dev
paths:
- path: /
service:
identifier: backend
port: http
frontend:
className: "external-nginx"
annotations:
external-dns.alpha.kubernetes.io/target: external.hsn.dev
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
nginx.ingress.kubernetes.io/enable-cors: "true"
nginx.ingress.kubernetes.io/cors-allow-origin: "https://piped.hsn.dev, https://piped-api.hsn.dev, https://piped-proxy.jahanson.tech"
hosts:
- host: piped.hsn.dev
paths:
- path: /
service:
identifier: frontend
port: http
ytproxy:
className: "internal-nginx"
annotations:
nginx.ingress.kubernetes.io/enable-cors: "true"
nginx.ingress.kubernetes.io/cors-allow-origin: "https://piped.hsn.dev, https://piped-api.hsn.dev, https://piped-proxy.jahanson.tech"
hosts:
- host: piped-proxy.jahanson.tech
paths:
- path: /
service:
identifier: ytproxy
port: http
persistence:
config:
type: secret
name: piped-secret
advancedMounts:
backend:
app:
- path: /app/config.properties
subPath: config.properties
readOnly: true

View file

@ -1,134 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: plex
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.5.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
dependsOn:
- name: nvidia-device-plugin
namespace: kube-system
- name: rook-ceph-cluster
namespace: rook-ceph
- name: volsync
namespace: volsync-system
values:
controllers:
plex:
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: ghcr.io/onedr0p/plex
tag: 1.41.2.9200-c6bbc1b53@sha256:47c6f3d85f4e739210860934a0bb24126170fa2f6a602fb909467f17a035c311
env:
TZ: America/Chicago
PLEX_ADVERTISE_URL: https://plex.hsn.dev:443,http://10.1.1.39:32400
PLEX_NO_AUTH_NETWORKS: 10.1.1.0/24,10.244.0.0/16
probes:
liveness: &probes
enabled: true
custom: true
spec:
httpGet:
path: /identity
port: 32400
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
readiness: *probes
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 10
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities: { drop: ["ALL"] }
resources:
requests:
cpu: 100m
limits:
nvidia.com/gpu: 1 # requesting 1 GPU
memory: 16Gi
defaultPodOptions:
securityContext:
runAsNonRoot: true
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
supplementalGroups: [44, 10000]
seccompProfile: { type: RuntimeDefault }
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
service:
app:
controller: plex
type: LoadBalancer
annotations:
io.cilium/lb-ipam-ips: 10.1.1.39
ports:
http:
port: 32400
ingress:
app:
annotations:
external-dns.alpha.kubernetes.io/target: external.hsn.dev
nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
className: external-nginx
hosts:
- host: "{{ .Release.Name }}.hsn.dev"
paths:
- path: /
service:
identifier: app
port: http
persistence:
config:
existingClaim: plex
# TODO: If setting up Plex for the first time, you'll want to add the globalMounts section
globalMounts:
- path: /config/Library/Application Support/Plex Media Server
# Separate PVC for cache to avoid backing up cache files
cache:
existingClaim: plex-cache
globalMounts:
- path: /config/Library/Application Support/Plex Media Server/Cache
logs:
type: emptyDir
globalMounts:
- path: /config/Library/Application Support/Plex Media Server/Logs
tmp:
type: emptyDir
transcode:
type: emptyDir
media:
type: nfs
server: 10.1.1.61
path: /moria/media
globalMounts:
- path: /media
readOnly: true

View file

@ -1,11 +0,0 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: plex-cache
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 75Gi
storageClassName: ceph-block

View file

@ -1,93 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: plex-image-cleanup
spec:
chart:
spec:
chart: app-template
version: 3.5.1
interval: 30m
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
interval: 30m
values:
controllers:
kometa-image-maid:
type: cronjob
annotations:
reloader.stakater.com/auto: "true"
cronjob:
schedule: "30 8 * * 6"
pod:
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- plex
topologyKey: kubernetes.io/hostname
securityContext:
runAsUser: 568
runAsGroup: 568
runAsNonRoot: true
containers:
app:
image:
repository: docker.io/kometateam/imagemaid
tag: v1.1.1
env:
PLEX_URL: http://plex.default.svc.cluster.local:32400
PLEX_TOKEN:
valueFrom:
secretKeyRef:
name: kometa-image-maid-secret
key: PLEX_TOKEN
PLEX_PATH: /data/plex_config/Library/Application Support/Plex Media Server
MODE: remove
PHOTO_TRANSCODER: true
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
resources:
requests:
cpu: 25m
memory: 128Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
persistence:
config:
type: emptyDir
advancedMounts:
kometa-image-maid:
app:
- path: /config
plex-config:
existingClaim: plex
advancedMounts:
kometa-image-maid:
app:
- path: /data/plex_config/Library/Application Support/Plex Media Server/
plex-cache:
existingClaim: plex-cache
advancedMounts:
kometa-image-maid:
app:
- path: /data/plex_config/Library/Application Support/Plex Media Server/Cache

View file

@ -1,55 +0,0 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app plex
namespace: flux-system
spec:
targetNamespace: default
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/default/plex/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: true
dependsOn:
- name: rook-ceph-cluster
- name: volsync
- name: external-secrets-stores
interval: 30m
timeout: 5m
postBuild:
substitute:
APP: *app
GATUS_PATH: /web/index.html
VOLSYNC_CAPACITY: 30Gi
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app kometa-image-maid
namespace: flux-system
spec:
targetNamespace: default
commonMetadata:
labels:
app.kubernetes.io/name: *app
interval: 30m
timeout: 5m
path: "./kubernetes/apps/default/plex/kometa-image-maid"
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: false
dependsOn:
- name: external-secrets-stores
- name: plex
postBuild:
substitute:
APP: *app

View file

@ -1,120 +0,0 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app scrypted
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.5.1
interval: 30m
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
values:
controllers:
scrypted:
annotations:
reloader.stakater.com/auto: "true"
pod:
nodeSelector:
google.feature.node.kubernetes.io/coral: "true"
nvidia.com/gpu.present: "true"
securityContext:
supplementalGroups:
- 568
containers:
app:
image:
repository: ghcr.io/koush/scrypted
tag: v0.123.31-jammy-nvidia
probes:
liveness:
enabled: true
readiness:
enabled: true
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
resources:
requests:
cpu: 136m
memory: 1024Mi
limits:
nvidia.com/gpu: 1
memory: 8192Mi
securityContext:
privileged: true
service:
app:
controller: *app
type: LoadBalancer
annotations:
io.cilium/lb-ipam-ips: 10.1.1.33
nameOverride: *app
ports:
http:
port: 11080
primary: true
rebroadcast1: # driveway
port: 39655
rebroadcast2: # sideyard
port: 46561
rebroadcast3: # doorbell
port: 44759
homekit: # homekit
port: 42010
homekit-bridge: # bridge
port: 33961
ingress:
app:
className: "internal-nginx"
annotations:
hosts:
- host: &host scrypted.jahanson.tech
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
persistence:
config:
existingClaim: scrypted
advancedMounts:
scrypted:
app:
- path: /server/volume
cache:
type: emptyDir
globalMounts:
- path: /.cache
cache-npm:
type: emptyDir
globalMounts:
- path: /.npm
dev-bus-usb:
type: hostPath
hostPath: /dev/bus/usb
hostPathType: Directory
sys-bus-usb:
type: hostPath
hostPath: /sys/bus/usb
hostPathType: Directory
recordings:
type: nfs
server: shadowfax.jahanson.tech
path: /nahar/scrypted
globalMounts:
- path: /recordings

View file

@ -1,30 +0,0 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &appname scrypted
namespace: flux-system
spec:
targetNamespace: default
commonMetadata:
labels:
app.kubernetes.io/name: *appname
interval: 30m
timeout: 5m
path: "./kubernetes/apps/default/scrypted/app"
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: false
dependsOn:
- name: rook-ceph-cluster
- name: volsync
- name: external-secrets-stores
postBuild:
substitute:
APP: *appname
APP_UID: "0"
APP_GID: "0"
VOLSYNC_CAPACITY: 5Gi

View file

@ -1,15 +1,16 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: &app jellyfin
name: jellyfin
namespace: default
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.5.1
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
@ -20,31 +21,26 @@ spec:
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
dependsOn:
- name: nvidia-device-plugin
namespace: kube-system
- name: node-feature-discovery
namespace: kube-system
- name: rook-ceph-cluster
namespace: rook-ceph
- name: volsync
namespace: volsync-system
strategy: rollback
values:
controllers:
jellyfin:
type: statefulset
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: ghcr.io/jellyfin/jellyfin
tag: 10.10.3@sha256:17c3a8d9dddb97789b5f37112840ebf96566442c14d4754193a6c2eb154bc221
repository: jellyfin/jellyfin
tag: 10.8.13
env:
NVIDIA_VISIBLE_DEVICES: "all"
NVIDIA_DRIVER_CAPABILITIES: "compute,video,utility"
DOTNET_SYSTEM_IO_DISABLEFILELOCKING: "true"
JELLYFIN_FFmpeg__probesize: 50000000
JELLYFIN_FFmpeg__analyzeduration: 50000000
JELLYFIN_PublishedServerUrl: jelly.hsn.dev
TZ: America/Chicago
probes:
liveness: &probes
@ -63,76 +59,58 @@ spec:
enabled: false
resources:
requests:
cpu: 100m
limits:
nvidia.com/gpu: 1 # requesting 1 GPU
cpu: 100m
memory: 512Mi
limits:
nvidia.com/gpu: 1
memory: 4Gi
defaultPodOptions:
securityContext:
runAsNonRoot: true
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
supplementalGroups: [44, 10000]
seccompProfile: { type: RuntimeDefault }
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
pod:
runtimeClassName: nvidia
enableServiceLinks: false
nodeSelector:
nvidia.com/gpu.present: "true"
securityContext:
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
supplementalGroups: [44, 105, 10000]
service:
app:
controller: *app
type: LoadBalancer
annotations:
io.cilium/lb-ipam-ips: 10.1.1.40
controller: jellyfin
ports:
http:
port: *port
ingress:
app:
annotations:
external-dns.alpha.kubernetes.io/target: external.hsn.dev
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
enabled: true
className: external-nginx
annotations:
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
external-dns.alpha.kubernetes.io/target: external.hsn.dev
hosts:
- host: "{{ .Release.Name }}.hsn.dev"
- host: &host "jelly.hsn.dev"
paths:
- path: /
service:
identifier: app
port: *port
internal:
className: internal-nginx
hosts:
- host: &host "{{ .Release.Name }}.jahanson.tech"
paths:
- path: /
service:
identifier: app
port: *port
port: http
tls:
- hosts:
- *host
persistence:
config:
existingClaim: jellyfin
enabled: true
existingClaim: *app
globalMounts:
- path: /config
media:
type: nfs
server: shadowfax.jahanson.tech
path: /moria/media
globalMounts:
- path: /media
readOnly: true
transcode:
enabled: true
type: emptyDir
globalMounts:
- path: /transcode
cache:
media:
enabled: true
type: emptyDir
type: nfs
server: 10.1.1.12
path: /mnt/users/Media
globalMounts:
- path: /cache
- path: /media

View file

@ -2,7 +2,7 @@
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: default
resources:
- ./gatus.yaml
- ./helmrelease.yaml
- ../../../../templates/volsync

View file

@ -3,23 +3,21 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app omegabrr
name: &app jellyfin
namespace: flux-system
spec:
targetNamespace: default
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
path: ./kubernetes/apps/default/omegabrr/app
path: ./kubernetes/apps/default/jellyfin/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 10Gi

View file

@ -10,10 +10,17 @@ spec:
name: onepassword-connect
target:
name: home-assistant-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
HASS_PIRATE_WEATHER_API_KEY: "{{ .HASS_PIRATE_WEATHER_API_KEY }}"
HASS_ELEVATION: "{{ .hass_elevation }}"
HASS_LATITUDE: "{{ .hass_latitude }}"
HASS_LONGITUDE: "{{ .hass_longitude }}"
dataFrom:
- extract:
key: home-assistant
rewrite:
- regexp:
source: "(.*)"
target: "hass_$1"

View file

@ -0,0 +1,90 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: home-assistant
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
controllers:
home-assistant:
annotations:
reloader.stakater.com/auto: "true"
pod:
annotations:
k8s.v1.cni.cncf.io/networks: |
[{
"name":"multus-iot",
"namespace": "kube-system",
"ips": ["10.1.3.151/24"]
}]
securityContext:
runAsUser: 568
runAsGroup: 568
runAsNonRoot: true
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
containers:
app:
image:
repository: ghcr.io/home-assistant/home-assistant
tag: 2024.5.5
env:
TZ: America/Chicago
HASS_HTTP_TRUSTED_PROXY_1: 10.244.0.0/16
envFrom:
- secretRef:
name: home-assistant-secret
resources:
requests:
cpu: 10m
limits:
memory: 1Gi
service:
app:
controller: home-assistant
ports:
http:
port: 8123
ingress:
app:
className: internal-nginx
hosts:
- host: &host hass.jahanson.tech
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts: [*host]
persistence:
config:
existingClaim: home-assistant
logs:
type: emptyDir
globalMounts:
- path: /config/logs
tts:
type: emptyDir
globalMounts:
- path: /config/tts
tmp:
type: emptyDir

View file

@ -6,22 +6,24 @@ metadata:
name: &app home-assistant
namespace: flux-system
spec:
targetNamespace: default
targetNamespace: home-automation
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
path: ./kubernetes/apps/default/home-assistant/app
- name: openebs-system
- name: volsync
path: ./kubernetes/apps/home-automation/home-assistant/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
GATUS_SUBDOMAIN: hass
VOLSYNC_CAPACITY: 5Gi

View file

@ -6,6 +6,4 @@ resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./jellyseerr/ks.yaml # sqlite
- ./radarr/ks.yaml # postgres
- ./sonarr/ks.yaml # postgres
- ./mosquitto/ks.yaml

View file

@ -0,0 +1,107 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: &app matter-server
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 3.2.1
interval: 15m
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 3
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
controllers:
matter-server:
type: statefulset
annotations:
reloader.stakater.com/auto: "true"
pod:
annotations:
k8s.v1.cni.cncf.io/networks: |
[{
"name":"multus-iot",
"namespace": "kube-system",
"ips": ["10.1.3.152/24"]
}]
securityContext:
runAsUser: 568
runAsGroup: 568
runAsNonRoot: true
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
containers:
app:
image:
repository: ghcr.io/home-assistant-libs/python-matter-server
tag: 6.0.1
pullPolicy: IfNotPresent
env:
TZ: "America/Chicago"
MATTER_SERVER__INSTANCE_NAME: Matter-Server
MATTER_SERVER__PORT: &port 5580
MATTER_SERVER__APPLICATION_URL: &host matter.jahanson.tech
MATTER_SERVER__LOG_LEVEL: info
probes:
liveness:
enabled: true
readiness:
enabled: true
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
resources:
requests:
memory: "100M"
limits:
memory: "500M"
service:
app:
controller: *app
type: LoadBalancer
annotations:
io.cilium/lb-ipam-ips: "10.1.1.37"
ports:
api:
enabled: true
primary: true
protocol: TCP
port: *port
externalTrafficPolicy: Cluster
persistence:
config:
enabled: true
existingClaim: matter-server
advancedMounts:
matter-server:
app:
- path: "/data"
ingress:
app:
className: internal-nginx
hosts:
- host: *host
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts: [*host]

View file

@ -3,25 +3,25 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app plex-trakt-sync
name: &app matter-server
namespace: flux-system
spec:
targetNamespace: default
targetNamespace: home-automation
commonMetadata:
labels:
app.kubernetes.io/name: *app
interval: 30m
timeout: 5m
path: "./kubernetes/apps/default/plex/trakt-sync"
dependsOn:
- name: openebs-system
- name: volsync
path: ./kubernetes/apps/home-automation/matter-server/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: true
dependsOn:
- name: rook-ceph-cluster
- name: volsync
- name: external-secrets-stores
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app

View file

@ -0,0 +1,9 @@
per_listener_settings false
listener 1883
allow_anonymous false
persistence true
persistence_location /data
autosave_interval 1800
connection_messages false
autosave_interval 60
password_file /mosquitto/external_config/mosquitto_pwd

View file

@ -0,0 +1,27 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: mosquitto
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: mosquitto-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
mosquitto_pwd: |
{{ .mosquitto_username }}:{{ .mosquitto_password }}
{{ .mosquitto_zwave_username }}:{{ .mosquitto_zwave_password }}
{{ .mosquitto_home_assistant_username }}:{{ .mosquitto_home_assistant_password }}
dataFrom:
- extract:
key: mosquitto
rewrite:
- regexp:
source: "(.*)"
target: "mosquitto_$1"

View file

@ -0,0 +1,105 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app mosquitto
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.2.1
interval: 30m
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
values:
controllers:
mosquitto:
annotations:
reloader.stakater.com/auto: "true"
pod:
securityContext:
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
initContainers:
init-config:
image:
repository: public.ecr.aws/docker/library/eclipse-mosquitto
tag: 2.0.18
command:
- "/bin/sh"
- "-c"
args:
- cp /tmp/secret/* /mosquitto/external_config/;
mosquitto_passwd -U /mosquitto/external_config/mosquitto_pwd;
chmod 0600 /mosquitto/external_config/mosquitto_pwd;
containers:
app:
image:
repository: public.ecr.aws/docker/library/eclipse-mosquitto
tag: 2.0.18
probes:
liveness:
enabled: true
readiness:
enabled: true
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
resources:
requests:
cpu: 5m
memory: 10M
limits:
memory: 10M
service:
app:
controller: mosquitto
type: LoadBalancer
annotations:
external-dns.alpha.kubernetes.io/hostname: "mqtt.jahanson.tech"
io.cilium/lb-ipam-ips: "10.1.1.36"
externalTrafficPolicy: Local
ports:
mqtt:
enabled: true
port: 1883
persistence:
data:
existingClaim: *app
advancedMounts:
mosquitto:
app:
- path: /data
mosquitto-configfile:
type: configMap
name: mosquitto-configmap
advancedMounts:
mosquitto:
app:
- path: /mosquitto/config/mosquitto.conf
subPath: mosquitto.conf
mosquitto-secret:
type: secret
name: mosquitto-secret
advancedMounts:
mosquitto:
init-config:
- path: /tmp/secret
mosquitto-externalconfig:
type: emptyDir
globalMounts:
- path: /mosquitto/external_config

View file

@ -3,9 +3,12 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./pvc.yaml
- ./helmrelease.yaml
- ../../../../templates/gatus/external
- ./externalsecret.yaml
- ../../../../templates/volsync
configMapGenerator:
- name: mosquitto-configmap
files:
- config/mosquitto.conf
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,28 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &appname mosquitto
namespace: flux-system
spec:
targetNamespace: home-automation
commonMetadata:
labels:
app.kubernetes.io/name: *appname
interval: 10m
path: "./kubernetes/apps/home-automation/mosquitto/app"
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: true
dependsOn:
- name: openebs
- name: volsync
- name: external-secrets-stores
postBuild:
substitute:
APP: *appname
VOLSYNC_CLAIM: mosquitto-data
VOLSYNC_CAPACITY: 512Mi

View file

@ -2,8 +2,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: coder
name: home-automation
labels:
kustomize.toolkit.fluxcd.io/prune: disabled
volsync.backube/privileged-movers: "true"
pgo-enabled-hsn.dev: "true"

View file

@ -0,0 +1,588 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.14.0
creationTimestamp: null
name: ciliumbgppeeringpolicies.cilium.io
spec:
group: cilium.io
names:
categories:
- cilium
- ciliumbgp
kind: CiliumBGPPeeringPolicy
listKind: CiliumBGPPeeringPolicyList
plural: ciliumbgppeeringpolicies
shortNames:
- bgpp
singular: ciliumbgppeeringpolicy
scope: Cluster
versions:
- additionalPrinterColumns:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v2alpha1
schema:
openAPIV3Schema:
description: CiliumBGPPeeringPolicy is a Kubernetes third-party resource for
instructing Cilium's BGP control plane to create virtual BGP routers.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Spec is a human readable description of a BGP peering policy
properties:
nodeSelector:
description: "NodeSelector selects a group of nodes where this BGP
Peering Policy applies. \n If empty / nil this policy applies to
all nodes."
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the value from the
MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
virtualRouters:
description: A list of CiliumBGPVirtualRouter(s) which instructs the
BGP control plane how to instantiate virtual BGP routers.
items:
description: CiliumBGPVirtualRouter defines a discrete BGP virtual
router configuration.
properties:
exportPodCIDR:
default: false
description: ExportPodCIDR determines whether to export the
Node's private CIDR block to the configured neighbors.
type: boolean
localASN:
description: LocalASN is the ASN of this virtual router. Supports
extended 32bit ASNs
format: int64
maximum: 4294967295
minimum: 0
type: integer
neighbors:
description: Neighbors is a list of neighboring BGP peers for
this virtual router
items:
description: CiliumBGPNeighbor is a neighboring peer for use
in a CiliumBGPVirtualRouter configuration.
properties:
advertisedPathAttributes:
description: AdvertisedPathAttributes can be used to apply
additional path attributes to selected routes when advertising
them to the peer. If empty / nil, no additional path
attributes are advertised.
items:
description: CiliumBGPPathAttributes can be used to
apply additional path attributes to matched routes
when advertising them to a BGP peer.
properties:
communities:
description: Communities defines a set of community
values advertised in the supported BGP Communities
path attributes. If nil / not set, no BGP Communities
path attribute will be advertised.
properties:
large:
description: Large holds a list of the BGP Large
Communities Attribute (RFC 8092) values.
items:
description: BGPLargeCommunity type represents
a value of the BGP Large Communities Attribute
(RFC 8092), as three 4-byte decimal numbers
separated by colons.
pattern: ^([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5]):([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5]):([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5])$
type: string
type: array
standard:
description: Standard holds a list of "standard"
32-bit BGP Communities Attribute (RFC 1997)
values defined as numeric values.
items:
description: BGPStandardCommunity type represents
a value of the "standard" 32-bit BGP Communities
Attribute (RFC 1997) as a 4-byte decimal
number or two 2-byte decimal numbers separated
by a colon (<0-65535>:<0-65535>). For example,
no-export community value is 65553:65281.
pattern: ^([0-9]|[1-9][0-9]{1,8}|[1-3][0-9]{9}|4[01][0-9]{8}|42[0-8][0-9]{7}|429[0-3][0-9]{6}|4294[0-8][0-9]{5}|42949[0-5][0-9]{4}|429496[0-6][0-9]{3}|4294967[01][0-9]{2}|42949672[0-8][0-9]|429496729[0-5])$|^([0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]):([0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])$
type: string
type: array
wellKnown:
description: WellKnown holds a list "standard"
32-bit BGP Communities Attribute (RFC 1997)
values defined as well-known string aliases
to their numeric values.
items:
description: "BGPWellKnownCommunity type represents
a value of the \"standard\" 32-bit BGP Communities
Attribute (RFC 1997) as a well-known string
alias to its numeric value. Allowed values
and their mapping to the numeric values:
\n internet = 0x00000000
(0:0) planned-shut = 0xffff0000
(65535:0) accept-own = 0xffff0001
(65535:1) route-filter-translated-v4 = 0xffff0002
(65535:2) route-filter-v4 = 0xffff0003
(65535:3) route-filter-translated-v6 = 0xffff0004
(65535:4) route-filter-v6 = 0xffff0005
(65535:5) llgr-stale = 0xffff0006
(65535:6) no-llgr = 0xffff0007
(65535:7) blackhole = 0xffff029a
(65535:666) no-export =
0xffffff01\t(65535:65281) no-advertise =
0xffffff02 (65535:65282) no-export-subconfed
\ = 0xffffff03 (65535:65283) no-peer
\ = 0xffffff04 (65535:65284)"
enum:
- internet
- planned-shut
- accept-own
- route-filter-translated-v4
- route-filter-v4
- route-filter-translated-v6
- route-filter-v6
- llgr-stale
- no-llgr
- blackhole
- no-export
- no-advertise
- no-export-subconfed
- no-peer
type: string
type: array
type: object
localPreference:
description: LocalPreference defines the preference
value advertised in the BGP Local Preference path
attribute. As Local Preference is only valid for
iBGP peers, this value will be ignored for eBGP
peers (no Local Preference path attribute will
be advertised). If nil / not set, the default
Local Preference of 100 will be advertised in
the Local Preference path attribute for iBGP peers.
format: int64
maximum: 4294967295
minimum: 0
type: integer
selector:
description: Selector selects a group of objects
of the SelectorType resulting into routes that
will be announced with the configured Attributes.
If nil / not set, all objects of the SelectorType
are selected.
properties:
matchExpressions:
description: matchExpressions is a list of label
selector requirements. The requirements are
ANDed.
items:
description: A label selector requirement
is a selector that contains values, a key,
and an operator that relates the key and
values.
properties:
key:
description: key is the label key that
the selector applies to.
type: string
operator:
description: operator represents a key's
relationship to a set of values. Valid
operators are In, NotIn, Exists and
DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string
values. If the operator is In or NotIn,
the values array must be non-empty.
If the operator is Exists or DoesNotExist,
the values array must be empty. This
array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the
value from the MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value}
pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions,
whose key field is "key", the operator is
"In", and the values array contains only "value".
The requirements are ANDed.
type: object
type: object
selectorType:
description: 'SelectorType defines the object type
on which the Selector applies: - For "PodCIDR"
the Selector matches k8s CiliumNode resources
(path attributes apply to routes announced for
PodCIDRs of selected CiliumNodes. Only affects
routes of cluster scope / Kubernetes IPAM CIDRs,
not Multi-Pool IPAM CIDRs. - For "CiliumLoadBalancerIPPool"
the Selector matches CiliumLoadBalancerIPPool
custom resources (path attributes apply to routes
announced for selected CiliumLoadBalancerIPPools).
- For "CiliumPodIPPool" the Selector matches CiliumPodIPPool
custom resources (path attributes apply to routes
announced for allocated CIDRs of selected CiliumPodIPPools).'
enum:
- PodCIDR
- CiliumLoadBalancerIPPool
- CiliumPodIPPool
type: string
required:
- selectorType
type: object
type: array
authSecretRef:
description: AuthSecretRef is the name of the secret to
use to fetch a TCP authentication password for this
peer.
type: string
connectRetryTimeSeconds:
default: 120
description: ConnectRetryTimeSeconds defines the initial
value for the BGP ConnectRetryTimer (RFC 4271, Section
8).
format: int32
maximum: 2147483647
minimum: 1
type: integer
eBGPMultihopTTL:
default: 1
description: EBGPMultihopTTL controls the multi-hop feature
for eBGP peers. Its value defines the Time To Live (TTL)
value used in BGP packets sent to the neighbor. The
value 1 implies that eBGP multi-hop feature is disabled
(only a single hop is allowed). This field is ignored
for iBGP peers.
format: int32
maximum: 255
minimum: 1
type: integer
families:
description: "Families, if provided, defines a set of
AFI/SAFIs the speaker will negotiate with it's peer.
\n If this slice is not provided the default families
of IPv6 and IPv4 will be provided."
items:
description: CiliumBGPFamily represents a AFI/SAFI address
family pair.
properties:
afi:
description: Afi is the Address Family Identifier
(AFI) of the family.
enum:
- ipv4
- ipv6
- l2vpn
- ls
- opaque
type: string
safi:
description: Safi is the Subsequent Address Family
Identifier (SAFI) of the family.
enum:
- unicast
- multicast
- mpls_label
- encapsulation
- vpls
- evpn
- ls
- sr_policy
- mup
- mpls_vpn
- mpls_vpn_multicast
- route_target_constraints
- flowspec_unicast
- flowspec_vpn
- key_value
type: string
required:
- afi
- safi
type: object
type: array
gracefulRestart:
description: GracefulRestart defines graceful restart
parameters which are negotiated with this neighbor.
If empty / nil, the graceful restart capability is disabled.
properties:
enabled:
description: Enabled flag, when set enables graceful
restart capability.
type: boolean
restartTimeSeconds:
default: 120
description: RestartTimeSeconds is the estimated time
it will take for the BGP session to be re-established
with peer after a restart. After this period, peer
will remove stale routes. This is described RFC
4724 section 4.2.
format: int32
maximum: 4095
minimum: 1
type: integer
required:
- enabled
type: object
holdTimeSeconds:
default: 90
description: HoldTimeSeconds defines the initial value
for the BGP HoldTimer (RFC 4271, Section 4.2). Updating
this value will cause a session reset.
format: int32
maximum: 65535
minimum: 3
type: integer
keepAliveTimeSeconds:
default: 30
description: KeepaliveTimeSeconds defines the initial
value for the BGP KeepaliveTimer (RFC 4271, Section
8). It can not be larger than HoldTimeSeconds. Updating
this value will cause a session reset.
format: int32
maximum: 65535
minimum: 1
type: integer
peerASN:
description: PeerASN is the ASN of the peer BGP router.
Supports extended 32bit ASNs
format: int64
maximum: 4294967295
minimum: 0
type: integer
peerAddress:
description: PeerAddress is the IP address of the peer.
This must be in CIDR notation and use a /32 to express
a single host.
format: cidr
type: string
peerPort:
default: 179
description: PeerPort is the TCP port of the peer. 1-65535
is the range of valid port numbers that can be specified.
If unset, defaults to 179.
format: int32
maximum: 65535
minimum: 1
type: integer
required:
- peerASN
- peerAddress
type: object
minItems: 1
type: array
podIPPoolSelector:
description: "PodIPPoolSelector selects CiliumPodIPPools based
on labels. The virtual router will announce allocated CIDRs
of matching CiliumPodIPPools. \n If empty / nil no CiliumPodIPPools
will be announced."
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the value from
the MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
serviceSelector:
description: "ServiceSelector selects a group of load balancer
services which this virtual router will announce. The loadBalancerClass
for a service must be nil or specify a class supported by
Cilium, e.g. \"io.cilium/bgp-control-plane\". Refer to the
following document for additional details regarding load balancer
classes: \n https://kubernetes.io/docs/concepts/services-networking/service/#load-balancer-class
\n If empty / nil no services will be announced."
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: A label selector requirement is a selector
that contains values, a key, and an operator that relates
the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: operator represents a key's relationship
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
enum:
- In
- NotIn
- Exists
- DoesNotExist
type: string
values:
description: values is an array of string values.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
description: MatchLabelsValue represents the value from
the MatchLabels {key,value} pair.
maxLength: 63
pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
type: string
description: matchLabels is a map of {key,value} pairs.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object
type: object
required:
- localASN
- neighbors
type: object
minItems: 1
type: array
required:
- virtualRouters
type: object
required:
- metadata
type: object
served: true
storage: true
subresources: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View file

@ -0,0 +1,36 @@
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPPeeringPolicy
# comments courtesy of JJGadgets
# MAKE SURE CRDs ARE INSTALLED IN CLUSTER VIA cilium-config ConfigMap OR Cilium HelmRelease/values.yaml (bgpControlPlane.enabled: true), BEFORE THIS IS APPLIED!
# "CiliumBGPPeeringPolicy" Custom Resource will replace the old MetalLB BGP's "bgp-config" ConfigMap
# "CiliumBGPPeeringPolicy" is used with `bgpControlPlane.enabled: true` which uses GoBGP, NOT the old `bgp.enabled: true` which uses MetalLB
metadata:
name: bgp-loadbalancer-ip-main
spec:
nodeSelector:
matchLabels:
kubernetes.io/os: "linux" # match all Linux nodes, change this to match more granularly if more than 1 PeeringPolicy is to be used throughout cluster
virtualRouters:
- localASN: 64512
exportPodCIDR: false
serviceSelector: # this replaces address-pools, instead of defining the range of IPs that can be assigned to LoadBalancer services, now services have to match below selectors for their LB IPs to be announced
matchExpressions:
- {
key: thisFakeSelector,
operator: NotIn,
values: ["will-match-and-announce-all-services"],
}
neighbors:
- peerAddress: "10.1.1.1/32" # unlike bgp-config ConfigMap, peerAddress needs to be in CIDR notation
peerASN: 64512
---
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumloadbalancerippool_v2alpha1.json
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: main-pool
spec:
cidrs:
- cidr: 10.45.0.1/24

View file

@ -0,0 +1,78 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: cilium
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: cilium
version: 1.15.3
sourceRef:
kind: HelmRepository
name: cilium
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
cluster:
name: homelab
id: 1
hubble:
relay:
enabled: true
ui:
enabled: true
metrics:
enableOpenMetrics: true
prometheus:
enabled: true
operator:
prometheus:
enabled: true
ipam:
mode: kubernetes
kubeProxyReplacement: true
k8sServiceHost: 127.0.0.1
k8sServicePort: 7445
rollOutCiliumPods: true
cgroup:
automount:
enabled: false
hostRoot: /sys/fs/cgroup
bgp:
enabled: false
announce:
loadbalancerIP: true
podCIDR: false
bgpControlPlane:
enabled: true
securityContext:
capabilities:
ciliumAgent:
- CHOWN
- KILL
- NET_ADMIN
- NET_RAW
- IPC_LOCK
- SYS_ADMIN
- SYS_RESOURCE
- DAC_OVERRIDE
- FOWNER
- SETGID
- SETUID
cleanCiliumState:
- NET_ADMIN
- SYS_ADMIN
- SYS_RESOURCE

View file

@ -0,0 +1,23 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: allow-ssh
spec:
description: ""
nodeSelector:
matchLabels:
# node-access: ssh
node-role.kubernetes.io/control-plane: "true"
ingress:
- fromEntities:
- cluster
- toPorts:
- ports:
- port: "22"
protocol: TCP
- icmps:
- fields:
- type: 8
family: IPv4

View file

@ -0,0 +1,27 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: api-server
spec:
nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# load balancer -> api server
- fromCIDR:
- 167.235.217.82/32
toPorts:
- ports:
- port: '6443'
protocol: TCP
egress:
# api server -> kubelet
- toEntities:
- remote-node
toPorts:
- ports:
- port: '10250'
protocol: TCP

View file

@ -0,0 +1,41 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: cilium-health
specs:
- endpointSelector:
# apply to health endpoints
matchLabels:
reserved:health: ''
ingress:
# cilium agent -> cilium agent
- fromEntities:
- host
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP
- nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# cilium agent -> cilium agent
- fromEntities:
- health
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP
egress:
# cilium agent -> cilium agent
- toEntities:
- health
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP

View file

@ -0,0 +1,26 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: cilium-vxlan
spec:
nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# node -> vxlan
- fromEntities:
- remote-node
toPorts:
- ports:
- port: '8472'
protocol: UDP
egress:
# node -> vxlan
- toEntities:
- remote-node
toPorts:
- ports:
- port: '8472'
protocol: UDP

View file

@ -0,0 +1,65 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: core-dns
namespace: kube-system
specs:
- nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# core dns -> api server
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '6443'
protocol: TCP
- nodeSelector:
# apply to all nodes
matchLabels: {}
egress:
# kubelet -> core dns probes
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '8080'
protocol: TCP
- port: '8181'
protocol: TCP
- endpointSelector:
# apply to core dns pods
matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
ingress:
# kubelet -> core dns probes
- fromEntities:
- host
toPorts:
- ports:
- port: '8080'
protocol: TCP
- port: '8181'
protocol: TCP
egress:
# core dns -> api server
- toEntities:
- kube-apiserver
toPorts:
- ports:
- port: '6443'
protocol: TCP
# core dns -> upstream DNS
- toCIDR:
- 185.12.64.1/32
- 185.12.64.2/32
toPorts:
- ports:
- port: '53'
protocol: UDP

View file

@ -0,0 +1,27 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: etcd
spec:
nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# etcd peer -> etcd peer
- fromEntities:
- remote-node
toPorts:
- ports:
- port: '2380'
protocol: TCP
egress:
# etcd peer -> etcd peer
- toEntities:
- remote-node
toPorts:
- ports:
- port: '2380'
protocol: TCP

View file

@ -0,0 +1,15 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: "cilium.io/v2"
kind: CiliumClusterwideNetworkPolicy
metadata:
name: allow-specific-traffic
spec:
endpointSelector: {}
ingress:
- fromEntities:
- host
toPorts:
- ports:
- port: '6443'
protocol: TCP

View file

@ -0,0 +1,50 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: hubble-relay
namespace: kube-system
specs:
- nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# hubble relay -> hubble agent
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4244'
protocol: TCP
egress:
# kubelet -> hubble relay probes
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4245'
protocol: TCP
- endpointSelector:
# apply to hubble relay pods
matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
ingress:
# kubelet -> hubble relay probes
- fromEntities:
- host
toPorts:
- ports:
- port: '4245'
protocol: TCP
egress:
# hubble relay -> hubble agent
- toEntities:
- host
- remote-node
toPorts:
- ports:
- port: '4244'
protocol: TCP

View file

@ -0,0 +1,75 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: hubble-ui
namespace: kube-system
specs:
- nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: ''
ingress:
# hubble ui -> api server
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
toPorts:
- ports:
- port: '6443'
protocol: TCP
- endpointSelector:
# apply to core dns endpoints
matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
ingress:
# hubble ui -> core dns
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
toPorts:
- ports:
- port: '53'
protocol: UDP
- endpointSelector:
# apply to hubble relay endpoints
matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
ingress:
# hubble ui -> hubble relay
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
toPorts:
- ports:
- port: '4245'
protocol: TCP
- endpointSelector:
# apply to hubble ui endpoints
matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-ui
egress:
# hubble ui -> api server
- toEntities:
- kube-apiserver
toPorts:
- ports:
- port: '6443'
protocol: TCP
# hubble ui -> hubble relay
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4245'
protocol: TCP
# hubble ui -> core dns
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '53'
protocol: UDP

View file

@ -0,0 +1,28 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: kubelet
spec:
nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# api server -> kubelet
- fromEntities:
- kube-apiserver
toPorts:
- ports:
- port: '10250'
protocol: TCP
egress:
# kubelet -> load balancer
- toCIDR:
- 167.235.217.82/32
toEntities:
- host
toPorts:
- ports:
- port: '6443'
protocol: TCP

View file

@ -0,0 +1,16 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kube-system
resources:
- ./allow-ssh.yaml
- ./apiserver.yaml
- ./cilium-health.yaml
- ./cilium-vxlan.yaml
- ./core-dns.yaml
- ./etcd.yaml
- ./hubble-relay.yaml
- ./hubble-ui.yaml
- ./kubelet.yaml

View file

@ -0,0 +1,17 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cilium
namespace: flux-system
spec:
interval: 30m
retryInterval: 1m
timeout: 5m
path: "./kubernetes/apps/kube-system/cilium/app"
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false

View file

@ -9,7 +9,7 @@ spec:
chart:
spec:
chart: spegel
version: v0.0.27
version: v0.0.23
sourceRef:
kind: HelmRepository
name: spegel-org

View file

@ -7,6 +7,6 @@ resources:
configMapGenerator:
- name: spegel-helm-values
files:
- values.yaml=./helm-values.yml
- values.yaml=./resources/values.yml
configurations:
- kustomizeconfig.yaml

View file

@ -0,0 +1,17 @@
---
spegel:
containerdSock: /run/containerd/containerd.sock
containerdRegistryConfigPath: /etc/cri/conf.d/hosts
registries:
- https://docker.io
- https://ghcr.io
- https://quay.io
- https://mcr.microsoft.com
- https://public.ecr.aws
- https://gcr.io
- https://registry.k8s.io
- https://k8s.gcr.io
- https://lscr.io
service:
registry:
hostPort: 29999

View file

@ -14,7 +14,8 @@ spec:
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -0,0 +1,109 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app zfs-scrub
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.2.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
kubanetics:
type: cronjob
cronjob:
schedule: "@weekly"
parallelism: 1 # Set to my total number of nodes
containers:
app:
image:
repository: ghcr.io/aarnaud/talos-debug-tools
tag: latest-6.6.29
command: ["/bin/bash", "-c"]
args:
- |
# Trim filesystems
chroot /host /usr/local/sbin/zpool scrub nahar
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
resources:
requests:
cpu: 25m
limits:
memory: 128Mi
securityContext:
privileged: true
pod:
hostNetwork: true
hostPID: true
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: *app
persistence:
netfs:
type: hostPath
hostPath: /sys
hostPathType: Directory
globalMounts:
- path: /sys
readOnly: true
dev:
type: hostPath
hostPath: /dev
hostPathType: Directory
globalMounts:
- path: /dev
modules:
type: hostPath
hostPath: /lib/modules
hostPathType: ""
globalMounts:
- path: /lib/modules
udev:
type: hostPath
hostPath: /run/udev
hostPathType: ""
globalMounts:
- path: /run/udev
localtime:
type: hostPath
hostPath: /etc/localtime
hostPathType: ""
globalMounts:
- path: /etc/localtime
host:
type: hostPath
hostPath: /
hostPathType: Directory
globalMounts:
- path: /host
efivars:
type: hostPath
hostPath: /sys/firmware/efi/efivars
hostPathType: ""
globalMounts:
- path: /sys/firmware/efi/efivars

View file

@ -5,8 +5,8 @@ kind: Kustomization
resources:
- ./helmrelease.yaml
configMapGenerator:
- name: generic-device-plugin-configmap
- name: zfs-scrub-configmap
files:
- ./resources/config.yml
- zfs-scrub.sh=./resources/zfs-scrub.sh
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,20 @@
#!/usr/bin/env bash
KUBELET_BIN="/usr/local/bin/kubelet"
KUBELET_PID="$(pgrep -f $KUBELET_BIN)"
ZPOOL="nahar"
if [ -z "${KUBELET_PID}" ]; then
echo "kubelet not found"
exit 1
fi
# Enter namespaces and run commands
nsrun() {
nsenter \
--mount="/host/proc/${KUBELET_PID}/ns/mnt" \
--net="/host/proc/${KUBELET_PID}/ns/net" \
-- bash -c "$1"
}
# Scrub filesystems
nsrun "zpool scrub ${ZPOOL}"

View file

@ -3,18 +3,19 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app chronyd
name: &app zfs-scrub
namespace: flux-system
spec:
targetNamespace: kube-system
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/kube-system/chronyd/app
path: ./kubernetes/apps/kube-system/zfs-scrub/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -0,0 +1,16 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: immich-app-config
labels:
app.kubernetes.io/name: immich
data:
LOG_LEVEL: verbose
DB_VECTOR_EXTENSION: pgvector
NODE_ENV: production
REDIS_HOSTNAME: dragonfly.database.svc.cluster.local
REDIS_PORT: "6379"
IMMICH_WEB_URL: http://immich-web.media.svc.cluster.local:3000
IMMICH_SERVER_URL: http://immich-server.media.svc.cluster.local:3001
IMMICH_MACHINE_LEARNING_URL: http://immich-machine-learning.media.svc.cluster.local:3003

View file

@ -3,19 +3,17 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: linkwarden
name: immich
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: linkwarden-secret
name: immich-secret
template:
engineVersion: v2
data:
NEXTAUTH_SECRET: "{{ .LINKWARDEN_NEXTAUTH_SECRET }}"
DATABASE_URL: |-
postgres://{{ .LINKWARDEN_POSTGRES_USER }}:{{ .LINKWARDEN_POSTGRES_PASSWORD }}@postgres-primary-real.database.svc/linkwarden
DATABASE_URI: "postgresql://{{ .DATABASE_USER }}:{{ .DATABASE_PASSWORD }}@immich-primary-real.media.svc:{{ .DATABASE_PORT }}/{{ .DATABASE_NAME }}"
dataFrom:
- extract:
key: linkwarden
key: immich

View file

@ -2,15 +2,15 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: mariadb-gatus-ep
name: immich-postgres-gatus-ep
labels:
gatus.io/enabled: "true"
data:
config.yaml: |
endpoints:
- name: mariadb
- name: immich-postgres
group: infrastructure
url: tcp://mariadb.database.svc.cluster.local:3306
url: tcp://immich-primary-real.media.svc.cluster.local:5432
interval: 1m
ui:
hide-url: true

View file

@ -0,0 +1,97 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: &name immich
namespace: default
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
immich-server:
type: statefulset
annotations:
reloader.stakater.com/auto: "true"
containers:
app:
image:
repository: ghcr.io/immich-app/immich-server
tag: v1.105.1
command: /bin/sh
args:
- ./start-server.sh
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
memory: 4Gi
env:
TZ: America/Chicago
DB_URL:
valueFrom:
secretKeyRef:
name: immich-secret
key: DATABASE_URI
envFrom:
- configMapRef:
name: immich-app-config
service:
app:
controller: immich-server
ports:
http:
port: 3001
ingress:
app:
enabled: true
className: external-nginx
annotations:
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
external-dns.alpha.kubernetes.io/target: external.hsn.dev
nginx.ingress.kubernetes.io/proxy-body-size: "0"
hosts:
- host: &host "im.hsn.dev"
paths:
- path: /
service:
identifier: app
port: http
tls:
- hosts:
- *host
persistence:
media:
enabled: true
type: nfs
server: 10.1.1.13
path: /eru/media/immich
globalMounts:
- path: /usr/src/app/upload

View file

@ -0,0 +1,27 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./configmap.yaml
- ./externalsecret.yaml
- ./gatus.yaml
- ./helmrelease.yaml
- ./machine-learning
- ./microservices
- ./postgresCluster.yaml
- ./pushsecret.yaml
- ./service.yaml
configMapGenerator:
- name: immich-databse-init-sql
files:
- init.sql=./resources/init.sql
labels:
- pairs:
app.kubernetes.io/name: immich
app.kubernetes.io/instance: immich
app.kubernetes.io/part-of: immich
generatorOptions:
disableNameSuffixHash: true
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled

View file

@ -0,0 +1,82 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: immich-machine-learning
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
interval: 15m
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
immich-machine-learning:
annotations:
reloader.stakater.com/auto: "true"
strategy: Recreate
pod:
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
containers:
app:
image:
repository: ghcr.io/immich-app/immich-machine-learning
tag: v1.105.1
resources:
requests:
cpu: 15m
memory: 250Mi
limits:
memory: 4000Mi
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
envFrom:
- configMapRef:
name: immich-app-config
env:
DB_URL:
valueFrom:
secretKeyRef:
name: immich-secret
key: DATABASE_URI
service:
app:
controller: immich-machine-learning
ports:
http:
port: 3003
persistence:
media:
enabled: true
type: nfs
server: 10.1.1.13
path: /eru/media/immich
globalMounts:
- path: /usr/src/app/upload
cache:
enabled: true
type: emptyDir

View file

@ -2,9 +2,10 @@
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: default
labels:
- pairs:
app.kubernetes.io/name: immich-machine-learning
app.kubernetes.io/instance: immich-machine-learning
app.kubernetes.io/part-of: immich
resources:
- ./helmrelease.yaml
- ./externalsecret.yaml
- ../../../../templates/gatus/internal
- ../../../../templates/volsync

View file

@ -0,0 +1,80 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: immich-microservices
spec:
interval: 15m
chart:
spec:
chart: app-template
version: 3.1.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
interval: 15m
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
strategy: rollback
values:
controllers:
immich-microservices:
strategy: Recreate
annotations:
reloader.stakater.com/auto: "true"
pod:
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
containers:
app:
image:
repository: ghcr.io/immich-app/immich-server
tag: v1.105.1
command: /bin/sh
args:
- ./start-microservices.sh
resources:
requests:
cpu: 100m
memory: 250Mi
limits:
memory: 4000Mi
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
envFrom:
- configMapRef:
name: immich-app-config
env:
DB_URL:
valueFrom:
secretKeyRef:
name: immich-secret
key: DATABASE_URI
service:
app:
controller: immich-microservices
enabled: false
persistence:
media:
enabled: true
type: nfs
server: 10.1.1.13
path: /eru/media/immich
globalMounts:
- path: /usr/src/app/upload

View file

@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
labels:
- pairs:
app.kubernetes.io/name: immich-microservices
app.kubernetes.io/instance: immich-microservices
app.kubernetes.io/part-of: immich
resources:
- ./helmrelease.yaml

View file

@ -0,0 +1,94 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/postgres-operator.crunchydata.com/postgrescluster_v1beta1.json
apiVersion: postgres-operator.crunchydata.com/v1beta1
kind: PostgresCluster
metadata:
name: &name "${APP}"
spec:
postgresVersion: 16
dataSource:
pgbackrest:
stanza: db
configuration:
- secret:
name: pgo-s3-creds
global:
repo1-path: "/${APP}/repo1"
repo1-s3-uri-style: path
repo:
name: repo1
s3:
bucket: "crunchy-postgres"
endpoint: "s3.hsn.dev"
region: "us-east-1"
monitoring:
pgmonitor:
exporter:
# https://github.com/CrunchyData/postgres-operator-examples/blob/main/helm/install/values.yaml
image: registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi8-0.15.0-3
patroni:
dynamicConfiguration:
synchronous_mode: true
postgresql:
synchronous_commit: "on"
pg_hba:
- hostnossl all all 10.244.0.0/16 md5
- hostssl all all all md5
databaseInitSQL:
name: immich-databse-init-sql
key: init.sql
instances:
- name: postgres
metadata:
labels:
app.kubernetes.io/name: pgo-${APP}
replicas: 1
dataVolumeClaimSpec:
storageClassName: openebs-zfs
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
topologySpreadConstraints:
- maxSkew: 1
topologyKey: "kubernetes.io/hostname"
whenUnsatisfiable: "DoNotSchedule"
labelSelector:
matchLabels:
postgres-operator.crunchydata.com/cluster: ${APP}
postgres-operator.crunchydata.com/data: postgres
users:
- name: "immich"
databases:
- "immich"
options: "SUPERUSER"
password:
type: AlphaNumeric
backups:
pgbackrest:
configuration:
- secret:
name: pgo-s3-creds
global:
archive-push-queue-max: 4GiB
repo1-retention-full: "14"
repo1-retention-full-type: time
repo1-path: "/${APP}/repo1"
repo1-s3-uri-style: path
manual:
repoName: repo1
options:
- --type=full
metadata:
labels:
app.kubernetes.io/name: pgo-${APP}-backup
repos:
- name: repo1
schedules:
full: "0 1 * * 0"
differential: "0 1 * * 1-6"
s3:
bucket: "crunchy-postgres"
endpoint: "s3.hsn.dev"
region: "us-east-1"

View file

@ -0,0 +1,40 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/pushsecret_v1alpha1.json
apiVersion: external-secrets.io/v1alpha1
kind: PushSecret
metadata:
name: immich
spec:
refreshInterval: 1h
secretStoreRefs:
- name: onepassword-connect
kind: ClusterSecretStore
selector:
secret:
name: immich-pguser-immich
data:
- match:
secretKey: dbname
remoteRef:
remoteKey: immich
property: DATABASE_NAME
- match:
secretKey: host
remoteRef:
remoteKey: immich
property: DATABASE_HOST
- match:
secretKey: user
remoteRef:
remoteKey: immich
property: DATABASE_USER
- match:
secretKey: password
remoteRef:
remoteKey: immich
property: DATABASE_PASSWORD
- match:
secretKey: port
remoteRef:
remoteKey: immich
property: DATABASE_PORT

View file

@ -0,0 +1,4 @@
\c immich\\
CREATE EXTENSION vector;
CREATE EXTENSION cube;
CREATE EXTENSION earthdistance;

View file

@ -0,0 +1,20 @@
---
apiVersion: v1
kind: Service
metadata:
labels:
postgres-operator.crunchydata.com/cluster: immich
postgres-operator.crunchydata.com/role: primary
name: immich-primary-real
namespace: media
spec:
internalTrafficPolicy: Cluster
ports:
- name: postgres
port: 5432
protocol: TCP
targetPort: postgres
selector:
postgres-operator.crunchydata.com/cluster: immich
postgres-operator.crunchydata.com/role: master
type: ClusterIP

View file

@ -3,27 +3,28 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app sonarr-anime
name: &app immich
namespace: flux-system
spec:
targetNamespace: anime
targetNamespace: media
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: crunchy-postgres-operator
- name: external-secrets-stores
- name: volsync
- name: rook-ceph-cluster
path: ./kubernetes/apps/anime/sonarr/app
- name: dragonfly
path: ./kubernetes/apps/media/immich/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 5Gi
DB_NAME: immich
DB_USER: immich

View file

@ -2,7 +2,8 @@
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: security
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./immich/ks.yaml

View file

@ -2,7 +2,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: anime
name: media
labels:
kustomize.toolkit.fluxcd.io/prune: disabled
volsync.backube/privileged-movers: "true"

View file

@ -1,5 +1,5 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
@ -9,7 +9,7 @@ spec:
chart:
spec:
chart: app-template
version: 3.5.1
version: 3.3.0
sourceRef:
kind: HelmRepository
name: bjw-s
@ -23,7 +23,7 @@ spec:
retries: 3
strategy: rollback
dependsOn:
- name: alertmanager
- name: kube-prometheus-stack
namespace: observability
values:
controllers:
@ -35,12 +35,13 @@ spec:
app:
image:
repository: ghcr.io/onedr0p/kubanetics
tag: 2024.11.1
tag: 2024.7.1@sha256:020ec6f00b9cdc0ee247d2fd34d3951ac32718326bb90c38e947eed9d555de6c
env:
SCRIPT_NAME: alertmanager-silencer.sh
ALERTMANAGER_URL: http://alertmanager.observability.svc.cluster.local:9093
MATCHERS_0: alertname=CephPGImbalance job=rook-ceph-exporter
MATCHERS_1: alertname=CephMonClockSkew job=rook-ceph-mgr
ALERTMANAGER_URL: http://alertmanager-operated.observability.svc.cluster.local:9093
MATCHERS_0: alertname=NodeCPUHighUsage job=node-exporter
MATCHERS_1: alertname=CPUThrottlingHigh container=gc
MATCHERS_2: alertname=CPUThrottlingHigh container=worker
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true

View file

@ -3,18 +3,19 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app node-exporter
name: &app alertmanager-silencer
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/node-exporter/app
path: ./kubernetes/apps/observability/alertmanager-silencer/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -0,0 +1,61 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: grafana-secret
namespace: observability
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: grafana-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: "{{ .authentik_grafana_oauth_client_secret }}"
GF_DATE_FORMATS_USE_BROWSER_LOCALE: "true"
GF_SERVER_ROOT_URL: https://grafana.hsn.dev
GF_DATABASE_NAME: "{{ .grafana_GF_DATABASE_NAME }}"
GF_DATABASE_HOST: "postgres-primary-real.database.svc"
GF_DATABASE_USER: "{{ .grafana_GF_DATABASE_USER }}"
GF_DATABASE_PASSWORD: "{{ .grafana_GF_DATABASE_PASSWORD }}"
GF_DATABASE_SSL_MODE: "require"
GF_DATABASE_TYPE: postgres
GF_ANALYTICS_CHECK_FOR_UPDATES: "false"
GF_ANALYTICS_CHECK_FOR_PLUGIN_UPDATES: "false"
GF_ANALYTICS_REPORTING_ENABLED: "false"
GF_AUTH_ANONYMOUS_ENABLED: "false"
GF_AUTH_BASIC_ENABLED: "false"
GF_AUTH_GENERIC_OAUTH_ENABLED: "true"
GF_AUTH_GENERIC_OAUTH_API_URL: https://auth.hsn.dev/application/o/userinfo/
GF_AUTH_GENERIC_OAUTH_AUTH_URL: https://auth.hsn.dev/application/o/authorize/
GF_AUTH_GENERIC_OAUTH_TOKEN_URL: https://auth.hsn.dev/application/o/token/
GF_AUTH_GENERIC_OAUTH_CLIENT_ID: CoV7ae1HxuNzwCbVPf3U7TfYMX2rVqC5T9RAUo5M
GF_AUTH_GENERIC_OAUTH_EMPTY_SCOPES: "false"
GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: "contains(groups[*], 'Grafana Admins') && 'Admin' || contains(groups[*], 'Grafana Editors') && 'Editor' || 'Viewer'"
GF_AUTH_GENERIC_OAUTH_SCOPES: openid profile email groups
GF_AUTH_OAUTH_AUTO_LOGIN: "true"
GF_EXPLORE_ENABLED: "true"
GF_FEATURE_TOGGLES_ENABLE: publicDashboards
GF_LOG_MODE: console
GF_NEWS_NEWS_FEED_ENABLED: "false"
GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS: natel-discrete-panel,pr0ps-trackmap-panel,panodata-map-panel
GF_SECURITY_COOKIE_SAMESITE: grafana
GF_SECURITY_ANGULAR_SUPPORT_ENABLED: "true"
dataFrom:
- extract:
key: Authentik
rewrite:
- regexp:
source: "(.*)"
target: "authentik_$1"
- extract:
key: grafana
rewrite:
- regexp:
source: "(.*)"
target: "grafana_$1"

View file

@ -0,0 +1,401 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: grafana
spec:
interval: 30m
chart:
spec:
chart: grafana
version: 8.3.7
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: kube-prometheus-stack
namespace: observability
- name: loki
namespace: observability
values:
replicas: 1
envFromSecret: grafana-secret
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: default
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default-folder
- name: ceph
orgId: 1
folder: Ceph
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/ceph-folder
- name: crunchy-postgres
orgId: 1
folder: Crunchy-postgres
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/crunchy-postgres-folder
- name: flux
orgId: 1
folder: Flux
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/flux-folder
- name: kubernetes
orgId: 1
folder: Kubernetes
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/kubernetes-folder
- name: nginx
orgId: 1
folder: Nginx
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/nginx-folder
- name: prometheus
orgId: 1
folder: Prometheus
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/prometheus-folder
- name: thanos
orgId: 1
folder: Thanos
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/thanos-folder
- name: unifi
orgId: 1
folder: Unifi
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/unifi-folder
datasources:
datasources.yaml:
apiVersion: 1
deleteDatasources:
- { name: Alertmanager, orgId: 1 }
- { name: Loki, orgId: 1 }
- { name: Prometheus, orgId: 1 }
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
url: http://thanos-query-frontend.observability.svc.cluster.local:10902
jsonData:
prometheusType: Thanos
timeInterval: 1m
isDefault: true
- name: Loki
type: loki
uid: loki
access: proxy
url: http://loki-gateway.observability.svc.cluster.local
jsonData:
maxLines: 250
- name: Alertmanager
type: alertmanager
uid: alertmanager
access: proxy
url: http://alertmanager-operated.observability.svc.cluster.local:9093
jsonData:
implementation: prometheus
dashboards:
default:
cloudflared:
# renovate: depName="Cloudflare Tunnels (cloudflared)"
gnetId: 17457
revision: 6
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
external-dns:
# renovate: depName="External-dns"
gnetId: 15038
revision: 3
datasource: Prometheus
minio:
# renovate: depName="MinIO Dashboard"
gnetId: 13502
revision: 25
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
node-exporter-full:
# renovate: depName="Node Exporter Full"
gnetId: 1860
revision: 33
datasource: Prometheus
postgres:
# renovate: depName="PostgreSQL Database"
gnetId: 9628
revision: 7
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
smartctl-exporter:
# renovate: depName="smartctl_exporter"
gnetId: 20204
revision: 1
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
spegel:
# renovate: depName="Spegel"
gnetId: 18089
revision: 1
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
unpackerr:
# renovate: depName="Unpackerr"
gnetId: 18817
revision: 1
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
zfs:
# renovate: depName="ZFS"
gnetId: 7845
revision: 4
datasource: Prometheus
dragonflydb:
url: https://raw.githubusercontent.com/dragonflydb/dragonfly/main/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
cert-manager:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
datasource: Prometheus
external-secrets:
url: https://raw.githubusercontent.com/external-secrets/external-secrets/main/docs/snippets/dashboard.json
datasource: Prometheus
node-feature-discovery:
url: https://raw.githubusercontent.com/kubernetes-sigs/node-feature-discovery/master/examples/grafana-dashboard.json
datasource: Prometheus
crunchy-postgres:
pgbackrest:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/pgbackrest.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
pods:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/pod_details.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/postgresql_details.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql-overview:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/postgresql_overview.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql-health:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/postgresql_service_health.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
postgresql-alerts:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/prometheus_alerts.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
query-stats:
url: https://raw.githubusercontent.com/CrunchyData/pgmonitor/development/grafana/containers/query_statistics.json
datasource:
- { name: DS_PROMETHEUS, value: Prometheus }
ceph:
ceph-cluster:
# renovate: depName="Ceph Cluster"
gnetId: 2842
revision: 17
datasource: Prometheus
ceph-osd:
# renovate: depName="Ceph - OSD (Single)"
gnetId: 5336
revision: 9
datasource: Prometheus
ceph-pools:
# renovate: depName="Ceph - Pools"
gnetId: 5342
revision: 9
datasource: Prometheus
flux:
flux-cluster:
url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/cluster.json
datasource: Prometheus
flux-control-plane:
url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/control-plane.json
datasource: Prometheus
kubernetes:
kubernetes-api-server:
# renovate: depName="Kubernetes / System / API Server"
gnetId: 15761
revision: 16
datasource: Prometheus
kubernetes-coredns:
# renovate: depName="Kubernetes / System / CoreDNS"
gnetId: 15762
revision: 17
datasource: Prometheus
kubernetes-global:
# renovate: depName="Kubernetes / Views / Global"
gnetId: 15757
revision: 37
datasource: Prometheus
kubernetes-namespaces:
# renovate: depName="Kubernetes / Views / Namespaces"
gnetId: 15758
revision: 34
datasource: Prometheus
kubernetes-nodes:
# renovate: depName="Kubernetes / Views / Nodes"
gnetId: 15759
revision: 29
datasource: Prometheus
kubernetes-pods:
# renovate: depName="Kubernetes / Views / Pods"
gNetId: 15760
revision: 21
datasource: Prometheus
kubernetes-volumes:
# renovate: depName="K8s / Storage / Volumes / Cluster"
gnetId: 11454
revision: 14
datasource: Prometheus
nginx:
nginx:
url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/nginx.json
datasource: Prometheus
nginx-request-handling-performance:
url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/request-handling-performance.json
datasource: Prometheus
prometheus:
prometheus:
# renovate: depName="Prometheus"
gnetId: 19105
revision: 3
datasource: Prometheus
thanos:
thanos-bucket-replicate:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/bucket-replicate.json
datasource: Prometheus
thanos-compact:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/compact.json
datasource: Prometheus
thanos-overview:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/overview.json
datasource: Prometheus
thanos-query:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/query.json
datasource: Prometheus
thanos-query-frontend:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/query-frontend.json
datasource: Prometheus
thanos-receieve:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/receive.json
datasource: Prometheus
thanos-rule:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/rule.json
datasource: Prometheus
thanos-sidecar:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/sidecar.json
datasource: Prometheus
thanos-store:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/store.json
datasource: Prometheus
unifi:
unifi-insights:
# renovate: depName="UniFi-Poller: Client Insights - Prometheus"
gnetId: 11315
revision: 9
datasource: Prometheus
unifi-network-sites:
# renovate: depName="UniFi-Poller: Network Sites - Prometheus"
gnetId: 11311
revision: 5
datasource: Prometheus
unifi-uap:
# renovate: depName="UniFi-Poller: UAP Insights - Prometheus"
gnetId: 11314
revision: 10
datasource: Prometheus
unifi-usw:
# renovate: depName="UniFi-Poller: USW Insights - Prometheus"
gnetId: 11312
revision: 9
datasource: Prometheus
sidecar:
dashboards:
enabled: true
searchNamespace: ALL
labelValue: ""
label: grafana_dashboard
folderAnnotation: grafana_folder
provider:
disableDelete: true
foldersFromFilesStructure: true
datasources:
enabled: true
searchNamespace: ALL
labelValue: ""
plugins:
- grafana-clock-panel
- grafana-piechart-panel
- grafana-worldmap-panel
- natel-discrete-panel
- pr0ps-trackmap-panel
- vonage-status-panel
serviceMonitor:
enabled: true
ingress:
enabled: true
ingressClassName: external-nginx
annotations:
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
external-dns.alpha.kubernetes.io/target: external.hsn.dev
hosts:
- &host grafana.hsn.dev
tls:
- hosts:
- *host
persistence:
enabled: false
testFramework:
enabled: false
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: grafana

View file

@ -3,27 +3,27 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app radarr-anime
name: &app grafana
namespace: flux-system
spec:
targetNamespace: anime
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: crunchy-postgres-operator
- name: external-secrets-stores
- name: rook-ceph-cluster
- name: volsync
path: ./kubernetes/apps/anime/radarr/app
path: ./kubernetes/apps/observability/grafana/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 5Gi
DB_NAME: grafana
DB_USER: grafana

View file

@ -3,17 +3,20 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: kometa-image-maid
name: alertmanager
spec:
refreshInterval: 5m
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: kometa-image-maid-secret
creationPolicy: Owner
data:
- secretKey: PLEX_TOKEN
remoteRef:
key: Plex
property: token
name: alertmanager-secret
template:
templateFrom:
- configMap:
name: alertmanager-config-tpl
items:
- key: alertmanager.yaml
dataFrom:
- extract:
key: pushover

View file

@ -0,0 +1,190 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-prometheus-stack
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: kube-prometheus-stack
version: 61.6.0
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
install:
crds: CreateReplace
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: CreateReplace
remediation:
strategy: rollback
retries: 3
values:
crds:
enabled: true
cleanPrometheusOperatorObjectNames: true
alertmanager:
ingress:
enabled: true
pathType: Prefix
ingressClassName: internal-nginx
hosts:
- &host alertmanager.jahanson.tech
tls:
- hosts:
- *host
alertmanagerSpec:
replicas: 1
useExistingSecret: true
configSecret: alertmanager-secret
storage:
volumeClaimTemplate:
spec:
storageClassName: openebs-hostpath
resources:
requests:
storage: 1Gi
kubelet:
enabled: true
serviceMonitor:
metricRelabelings:
# Drop high cardinality labels
- action: labeldrop
regex: (uid)
- action: labeldrop
regex: (id|name)
- action: drop
sourceLabels: ["__name__"]
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
kubeApiServer:
enabled: true
serviceMonitor:
metricRelabelings:
# Drop high cardinality labels
- action: drop
sourceLabels: ["__name__"]
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
- action: drop
sourceLabels: ["__name__"]
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
kubeControllerManager:
enabled: true
endpoints: &cp
- 10.1.1.61
kubeEtcd:
enabled: true
endpoints: *cp
kubeScheduler:
enabled: true
endpoints: *cp
kubeProxy:
enabled: false
prometheus:
ingress:
enabled: true
ingressClassName: internal-nginx
pathType: Prefix
hosts:
- &host prometheus.jahanson.tech
tls:
- hosts:
- *host
thanosService:
enabled: true
thanosServiceMonitor:
enabled: true
# thanosServiceExternal:
# enabled: true
# type: LoadBalancer
# annotations:
# external-dns.alpha.kubernetes.io/hostname: thanos.jahanson.tech
# io.cilium/lb-ipam-ips: 10.45.0.6
# externalTrafficPolicy: Cluster
prometheusSpec:
podMetadata:
annotations:
secret.reloader.stakater.com/reload: &secret thanos-objstore-config
replicas: 1
replicaExternalLabelName: __replica__
scrapeInterval: 1m # Must match interval in Grafana Helm chart
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
probeSelectorNilUsesHelmValues: false
scrapeConfigSelectorNilUsesHelmValues: false
enableAdminAPI: true
walCompression: true
enableFeatures:
- auto-gomemlimit
- memory-snapshot-on-shutdown
- new-service-discovery-manager
image:
registry: quay.io
repository: prometheus/prometheus
tag: v2.51.0-dedupelabels
thanos:
image: quay.io/thanos/thanos:${THANOS_VERSION}
version: "${THANOS_VERSION#v}"
objectStorageConfig:
existingSecret:
name: *secret
key: config
retention: 2d
retentionSize: 15GB
externalLabels:
cluster: main
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: openebs-hostpath
resources:
requests:
storage: 20Gi
nodeExporter:
enabled: true
prometheus-node-exporter:
fullnameOverride: node-exporter
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
kubeStateMetrics:
enabled: true
kube-state-metrics:
fullnameOverride: kube-state-metrics
metricLabelsAllowlist:
- pods=[*]
- deployments=[*]
- persistentvolumeclaims=[*]
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
grafana:
enabled: false
forceDeployDashboards: true
sidecar:
dashboards:
annotations:
grafana_folder: Kubernetes
multicluster:
etcd:
enabled: true

View file

@ -5,11 +5,12 @@ kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml
- ./prometheusrules
# - ./scrapeconfigs
- ./podmonitors
configMapGenerator:
- name: alertmanager-configmap
- name: alertmanager-config-tpl
files:
- resources/alertmanager.yml
- alertmanager.yaml=./resources/alertmanager.yaml
generatorOptions:
disableNameSuffixHash: true
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled

View file

@ -0,0 +1,34 @@
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/podmonitor_v1.json
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: crunchy-postgres-exporter
spec:
selector:
matchLabels:
postgres-operator.crunchydata.com/crunchy-postgres-exporter: 'true'
namespaceSelector:
matchNames:
- database
- media
podMetricsEndpoints:
- port: "exporter"
relabelings:
- sourceLabels: [__meta_kubernetes_pod_container_port_number]
action: keep
regex: "9187"
- sourceLabels: [__meta_kubernetes_namespace]
targetLabel: kubernetes_namespace
- sourceLabels: [__meta_kubernetes_pod_name]
targetLabel: pod
- sourceLabels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_postgres_operator_crunchydata_com_cluster]
separator: ":"
targetLabel: pg_cluster
replacement: "$1$2"
- sourceLabels: [__meta_kubernetes_pod_ip]
targetLabel: ip
- sourceLabels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_instance]
targetLabel: deployment
- sourceLabels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_role]
targetLabel: role

View file

@ -3,11 +3,17 @@
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: dragonfly
name: dragonflydb-metrics
namespace: database
spec:
selector:
matchLabels:
app.kubernetes.io/name: dragonfly
app: dragonfly
podTargetLabels: ["app"]
podTargetLabels:
- app
namespaceSelector:
matchNames:
- database
podMetricsEndpoints:
- port: admin

View file

@ -3,5 +3,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ./externalsecret.yaml
- ./crunchy-postgres.yaml
- ./dragonflydb.yaml

View file

@ -3,4 +3,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helmrelease.yaml
- ./prometheusrule.yaml

View file

@ -0,0 +1,37 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: miscellaneous-rules
labels:
prometheus: k8s
role: alert-rules
spec:
groups:
- name: dockerhub
rules:
- alert: BootstrapRateLimitRisk
annotations:
summary: Kubernetes cluster at risk of being rate limited by dockerhub on bootstrap
expr: count(time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30) > 100
for: 15m
labels:
severity: critical
- name: oom
rules:
- alert: OOMKilled
annotations:
summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
labels:
severity: critical
- name: zfs
rules:
- alert: ZfsUnexpectedPoolState
annotations:
summary: ZFS pool {{$labels.zpool}} on {{$labels.instance}} is in a unexpected state {{$labels.state}}
expr: node_zfs_zpool_state{state!="online"} > 0
for: 15m
labels:
severity: critical

View file

@ -0,0 +1,68 @@
---
global:
resolve_timeout: 5m
route:
group_by: ["alertname", "job"]
group_interval: 10m
group_wait: 1m
receiver: pushover
repeat_interval: 12h
routes:
- receiver: heartbeat
group_interval: 5m
group_wait: 0s
matchers:
- alertname =~ "Watchdog"
repeat_interval: 5m
- receiver: "null"
matchers:
- alertname =~ "InfoInhibitor"
- receiver: pushover
continue: true
matchers:
- severity = "critical"
inhibit_rules:
- equal: ["alertname", "namespace"]
source_matchers:
- severity = "critical"
target_matchers:
- severity = "warning"
receivers:
- name: heartbeat
webhook_configs:
- send_resolved: true
url: "{{ .alertmanager_heartbeat_url }}"
- name: "null"
- name: pushover
pushover_configs:
- html: true
# Compooters are hard
message: |-
{{ "{{-" }} range .Alerts {{ "}}" }}
{{ "{{-" }} if ne .Annotations.description "" {{ "}}" }}
{{ "{{" }} .Annotations.description {{ "}}" }}
{{ "{{-" }} else if ne .Annotations.summary "" {{ "}}" }}
{{ "{{" }} .Annotations.summary {{ "}}" }}
{{ "{{-" }} else if ne .Annotations.message "" {{ "}}" }}
{{ "{{" }} .Annotations.message {{ "}}" }}
{{ "{{-" }} else {{ "}}" }}
Alert description not available
{{ "{{-" }} end {{ "}}" }}
{{ "{{-" }} if gt (len .Labels.SortedPairs) 0 {{ "}}" }}
<small>
{{ "{{-" }} range .Labels.SortedPairs {{ "}}" }}
<b>{{ "{{" }} .Name {{ "}}" }}:</b> {{ "{{" }} .Value {{ "}}" }}
{{ "{{-" }} end {{ "}}" }}
</small>
{{ "{{-" }} end {{ "}}" }}
{{ "{{-" }} end {{ "}}" }}
priority: |-
{{ "{{" }} if eq .Status "firing" {{ "}}" }}1{{ "{{" }} else {{ "}}" }}0{{ "{{" }} end {{ "}}" }}
send_resolved: true
sound: gamelan
title: >-
{{ "{{" }} .CommonLabels.alertname {{ "}}" }}
[{{ "{{" }} .Status | toUpper {{ "}}" }}{{ "{{" }} if eq .Status "firing" {{ "}}" }}:{{ "{{" }} .Alerts.Firing | len {{ "}}" }}{{ "{{" }} end {{ "}}" }}]
token: "{{ .alertmanager_token }}"
url_title: View in Alertmanager
user_key: "{{ .userkey_jahanson }}"

View file

@ -3,5 +3,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helmrelease.yaml
- rbac.yaml
- ./node-exporter.yaml
- ./zfs-exporter.yaml

View file

@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: node-exporter
spec:
staticConfigs:
- targets:
- 10.1.1.1:9100
metricsPath: /metrics

View file

@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json
apiVersion: monitoring.coreos.com/v1alpha1
kind: ScrapeConfig
metadata:
name: zfs-exporter
spec:
staticConfigs:
- targets:
- 10.1.1.13:9134
metricsPath: /metrics

View file

@ -3,27 +3,27 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app flood
name: &app kube-prometheus-stack
namespace: flux-system
spec:
targetNamespace: qbittorrent
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
- name: qbittorrent
- name: openebs
- name: volsync
- name: rook-ceph-cluster
path: ./kubernetes/apps/qbittorrent/flood/app
path: ./kubernetes/apps/observability/kube-prometheus-stack/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
timeout: 5m
retryInterval: 1m
timeout: 15m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 2Gi
# renovate: datasource=docker depName=quay.io/thanos/thanos
THANOS_VERSION: v0.34.1

View file

@ -3,25 +3,26 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: &name mariadb
name: loki
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: mariadb-secret
name: loki-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
AWS_ACCESS_KEY_ID: "{{ .minio_mariadb_access_key }}"
AWS_SECRET_ACCESS_KEY: "{{ .minio_mariadb_secret_key }}"
S3_HOST: s3.hsn.dev
S3_BUCKET: "{{ .minio_thanos_bucket_name }}"
S3_ACCESS_KEY: "{{ .minio_loki_access_key }}"
S3_SECRET_KEY: "{{ .minio_loki_secret_key }}"
S3_REGION: us-east-1
dataFrom:
- extract:
key: minio
rewrite:
- regexp:
source: "[-]"
target: "_"
- regexp:
source: "(.*)"
target: "minio_$1"

View file

@ -0,0 +1,138 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: loki
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: loki
version: 6.7.3
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: uninstall
retries: 3
valuesFrom:
- targetPath: loki.storage.bucketNames.chunks
kind: Secret
name: loki-secret
valuesKey: S3_BUCKET
- targetPath: loki.storage.s3.endpoint
kind: Secret
name: loki-secret
valuesKey: S3_HOST
- targetPath: loki.storage.s3.region
kind: Secret
name: loki-secret
valuesKey: S3_REGION
- targetPath: loki.storage.s3.accessKeyId
kind: Secret
name: loki-secret
valuesKey: S3_ACCESS_KEY
- targetPath: loki.storage.s3.secretAccessKey
kind: Secret
name: loki-secret
valuesKey: S3_SECRET_KEY
values:
deploymentMode: SimpleScalable
loki:
podAnnotations:
secret.reloader.stakater.com/reload: loki-secret
ingester:
chunk_encoding: snappy
storage:
type: s3
s3:
s3ForcePathStyle: true
insecure: true
schemaConfig:
configs:
- from: "2024-04-01"
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
structuredConfig:
auth_enabled: false
server:
log_level: info
http_listen_port: 3100
grpc_listen_port: 9095
grpc_server_max_recv_msg_size: 8388608
grpc_server_max_send_msg_size: 8388608
limits_config:
ingestion_burst_size_mb: 128
ingestion_rate_mb: 64
max_query_parallelism: 100
per_stream_rate_limit: 64M
per_stream_rate_limit_burst: 128M
reject_old_samples: true
reject_old_samples_max_age: 168h
retention_period: 30d
shard_streams:
enabled: true
split_queries_by_interval: 1h
query_scheduler:
max_outstanding_requests_per_tenant: 4096
frontend:
max_outstanding_per_tenant: 4096
ruler:
enable_api: true
enable_alertmanager_v2: true
alertmanager_url: http://alertmanager-operated.observability.svc.cluster.local:9093
storage:
type: local
local:
directory: /rules
rule_path: /rules/fake
analytics:
reporting_enabled: false
backend:
replicas: 1
persistence:
size: 20Gi
storageClass: openebs-hostpath
gateway:
replicas: 1
image:
registry: ghcr.io
ingress:
enabled: true
ingressClassName: internal-nginx
hosts:
- host: &host loki.jahanson.tech
paths:
- path: /
pathType: Prefix
tls:
- hosts: [*host]
read:
replicas: 1
write:
replicas: 1
persistence:
size: 20Gi
storageClass: openebs-hostpath
sidecar:
image:
repository: ghcr.io/kiwigrid/k8s-sidecar
rules:
searchNamespace: ALL
folder: /rules/fake
lokiCanary:
enabled: false
test:
enabled: false

View file

@ -3,22 +3,23 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app scrutiny
name: &app loki
namespace: flux-system
spec:
targetNamespace: observability
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/observability/scrutiny/app
dependsOn:
- name: external-secrets-stores
- name: openebs
- name: vector
path: ./kubernetes/apps/observability/loki/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
name: homelab
wait: false
interval: 30m
retryInterval: 1m
postBuild:
substitute:
APP: *app
VOLSYNC_CAPACITY: 1Gi
timeout: 15m

View file

@ -0,0 +1,28 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: thanos
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: thanos-secret
creationPolicy: Owner
template:
engineVersion: v2
data:
S3_HOST: s3.hsn.dev
S3_BUCKET: "{{ .minio_thanos_bucket_name }}"
S3_ACCESS_KEY: "{{ .minio_thanos_access_key }}"
S3_SECRET_KEY: "{{ .minio_thanos_secret_key }}"
S3_REGION: us-east-1
dataFrom:
- extract:
key: Minio
rewrite:
- regexp:
source: "(.*)"
target: "minio_$1"

Some files were not shown because too many files have changed in this diff Show more