Compare commits

...

10 commits

13 changed files with 200 additions and 15 deletions

View file

@ -16,6 +16,7 @@ spec:
memory: 3Gi memory: 3Gi
args: args:
- "--proactor_threads=4" - "--proactor_threads=4"
- "--default_lua_flags=allow-undeclared-keys"
# Need retention policy before this is enabled # Need retention policy before this is enabled
# Or add S3 details and enable retention policy on the bucket. # Or add S3 details and enable retention policy on the bucket.
# snapshot: # snapshot:

View file

@ -31,7 +31,7 @@ spec:
app: app:
image: image:
repository: ghcr.io/onedr0p/prowlarr-develop repository: ghcr.io/onedr0p/prowlarr-develop
tag: 1.15.0.4361@sha256:0b6b55c6b230da58c73dce53976c3ff866cab099eefc9d5a1e85bf6b1c610ae1 tag: 1.15.0.4361@sha256:833d7ca0c25e6ad28b48c0624fc54947bdaa724bba1a25c85b21d8484a96db3c
env: env:
# Ref: https://github.com/Radarr/Radarr/issues/7030#issuecomment-1039689518 # Ref: https://github.com/Radarr/Radarr/issues/7030#issuecomment-1039689518
# Ref: https://github.com/dotnet/runtime/issues/9336 # Ref: https://github.com/dotnet/runtime/issues/9336

View file

@ -0,0 +1,29 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: intel-device-plugin-operator
spec:
interval: 30m
chart:
spec:
chart: intel-device-plugins-operator
version: 0.29.0
sourceRef:
kind: HelmRepository
name: intel
namespace: flux-system
install:
crds: CreateReplace
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: CreateReplace
remediation:
strategy: rollback
retries: 3
dependsOn:
- name: node-feature-discovery
namespace: kube-system

View file

@ -0,0 +1,6 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml

View file

@ -0,0 +1,31 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: intel-device-plugin-gpu
spec:
interval: 30m
chart:
spec:
chart: intel-device-plugins-gpu
version: 0.29.0
sourceRef:
kind: HelmRepository
name: intel
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
dependsOn:
- name: intel-device-plugin-operator
namespace: kube-system
values:
name: intel-gpu-plugin
sharedDevNum: 3
nodeFeatureRule: true

View file

@ -0,0 +1,6 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml

View file

@ -0,0 +1,42 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app intel-device-plugin
namespace: flux-system
spec:
targetNamespace: kube-system
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/kube-system/intel-device-plugin/app
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app intel-device-plugin-gpu
namespace: flux-system
spec:
targetNamespace: kube-system
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/kube-system/intel-device-plugin/gpu
prune: true
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -10,6 +10,7 @@ resources:
- ./descheduler/ks.yaml - ./descheduler/ks.yaml
- ./fstrim/ks.yaml - ./fstrim/ks.yaml
- ./metrics-server/ks.yaml - ./metrics-server/ks.yaml
- ./intel-device-plugin/ks.yaml
- ./nvidia-device-plugin/ks.yaml - ./nvidia-device-plugin/ks.yaml
- ./node-feature-discovery/ks.yaml - ./node-feature-discovery/ks.yaml
- ./reloader/ks.yaml - ./reloader/ks.yaml

View file

@ -59,6 +59,10 @@ spec:
secretKeyRef: secretKeyRef:
name: immich-pguser-immich name: immich-pguser-immich
key: uri key: uri
pod:
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
service: service:
app: app:
controller: immich-machine-learning controller: immich-machine-learning

View file

@ -62,6 +62,10 @@ spec:
secretKeyRef: secretKeyRef:
name: immich-pguser-immich name: immich-pguser-immich
key: uri key: uri
pod:
nodeSelector:
nvidia.com/gpu.present: "true"
runtimeClassName: nvidia
service: service:
app: app:
controller: immich-microservices controller: immich-microservices

View file

@ -7,8 +7,27 @@
```sh ```sh
omnictl cluster template sync -f ./template.yaml --omniconfig ./omniconfig.yaml omnictl cluster template sync -f ./template.yaml --omniconfig ./omniconfig.yaml
``` ```
## CNI
## Flux ### Install Cilium
```sh
cilium install \
--helm-set=ipam.mode=kubernetes \
--helm-set=kubeProxyReplacement=true \
--helm-set=securityContext.capabilities.ciliumAgent="{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" \
--helm-set=securityContext.capabilities.cleanCiliumState="{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" \
--helm-set=cgroup.autoMount.enabled=false \
--helm-set=cgroup.hostRoot=/sys/fs/cgroup \
--helm-set=k8sServiceHost=127.0.0.1 \
--helm-set=k8sServicePort=7445 \
--helm-set=bgpControlPlane.enabled=true \
--helm-set=bgp.enabled=false \
--helm-set=bgp.announce.loadbalancerIP=true \
--helm-set=bgp.announce.podCIDR=false
```
## Flux Prep
### Install Flux ### Install Flux
@ -25,10 +44,15 @@ sops --decrypt kubernetes/bootstrap/flux/age-key.sops.yaml | kubectl apply -f -
sops --decrypt kubernetes/bootstrap/flux/git-deploy-key.sops.yaml | kubectl apply -f - sops --decrypt kubernetes/bootstrap/flux/git-deploy-key.sops.yaml | kubectl apply -f -
sops --decrypt kubernetes/flux/vars/cluster-secrets.sops.yaml | kubectl apply -f - sops --decrypt kubernetes/flux/vars/cluster-secrets.sops.yaml | kubectl apply -f -
kubectl apply -f kubernetes/flux/vars/cluster-settings.yaml kubectl apply -f kubernetes/flux/vars/cluster-settings.yaml
kubectl apply -k kubernetes/bootstrap/kps-crds/
``` ```
### Kick off Flux applying this repository ## Wipe Rook Ceph
```sh
kubectl apply -f kubernetes/tools/wiperook.yaml
```
## Kick off Flux applying this repository
```sh ```sh
kubectl apply --server-side --kustomize ./kubernetes/flux/config kubectl apply --server-side --kustomize ./kubernetes/flux/config

View file

@ -0,0 +1,37 @@
---
apiVersion: v1
kind: Pod
metadata:
name: disk-wipe-one
namespace: rook-ceph
spec:
restartPolicy: Never
nodeName: talos-ltk-p4a
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
securityContext:
privileged: true
resources: {}
env:
- name: CEPH_DISK
value: "/dev/xvdb"
command:
[
"/bin/sh",
"-c"
]
args:
- apk add --no-cache sgdisk util-linux parted;
sgdisk --zap-all $CEPH_DISK;
blkdiscard $CEPH_DISK;
dd if=/dev/zero bs=1M count=1000 oflag=direct of=$CEPH_DISK;
partprobe $CEPH_DISK;
volumeMounts:
- mountPath: /mnt/host_var
name: host-var
volumes:
- name: host-var
hostPath:
path: /var

View file

@ -2,11 +2,11 @@
apiVersion: v1 apiVersion: v1
kind: Pod kind: Pod
metadata: metadata:
name: disk-wipe-nessa name: disk-wipe-s01
namespace: rook-ceph namespace: kube-system
spec: spec:
restartPolicy: Never restartPolicy: Never
nodeName: talos-nz9-4fz nodeName: talos-0ye-ep1
containers: containers:
- name: disk-wipe - name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638 image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
@ -39,11 +39,11 @@ spec:
apiVersion: v1 apiVersion: v1
kind: Pod kind: Pod
metadata: metadata:
name: disk-wipe-nienna name: disk-wipe-anduril
namespace: rook-ceph namespace: kube-system
spec: spec:
restartPolicy: Never restartPolicy: Never
nodeName: talos-9c9-dj0 nodeName: talos-9pw-zvs
containers: containers:
- name: disk-wipe - name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638 image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
@ -52,7 +52,7 @@ spec:
resources: {} resources: {}
env: env:
- name: CEPH_DISK - name: CEPH_DISK
value: "/dev/xvdb" value: "/dev/nvme1n1"
command: command:
[ [
"/bin/sh", "/bin/sh",
@ -76,11 +76,11 @@ spec:
apiVersion: v1 apiVersion: v1
kind: Pod kind: Pod
metadata: metadata:
name: disk-wipe-orome name: disk-wipe-g01
namespace: rook-ceph namespace: kube-system
spec: spec:
restartPolicy: Never restartPolicy: Never
nodeName: talos-dz9-5ys nodeName:
containers: containers:
- name: disk-wipe - name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638 image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
@ -89,7 +89,7 @@ spec:
resources: {} resources: {}
env: env:
- name: CEPH_DISK - name: CEPH_DISK
value: "/dev/xvdb" value: "/dev/nvme0n1"
command: command:
[ [