Merge pull request 'feat: network-overhaul' (#1038) from network-overhaul into main

Reviewed-on: #1038
This commit is contained in:
Joseph Hanson 2025-01-01 01:42:13 -06:00
commit 3a7d25d1e6
24 changed files with 459 additions and 54 deletions

3
.gitignore vendored
View file

@ -27,3 +27,6 @@ config.xml
# syncthing
**/*sync-conflict*
# Aider
.aider*

View file

@ -2,6 +2,30 @@
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: autobrr-db
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: autobrr-db-secret
template:
engineVersion: v2
data:
AUTOBRR__DATABASE_TYPE: postgres
AUTOBRR__POSTGRES_DATABASE: "{{ .dbname }}"
AUTOBRR__POSTGRES_HOST: "{{ index . \"pgbouncer-host\" }}"
AUTOBRR__POSTGRES_USER: "{{ .user }}"
AUTOBRR__POSTGRES_PASS: "{{ .password }}"
AUTOBRR__POSTGRES_PORT: "{{ .port }}"
dataFrom:
- extract:
key: postgres-pguser-sonarr-anime
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: autobrr
spec:
@ -13,12 +37,6 @@ spec:
template:
engineVersion: v2
data:
AUTOBRR__DATABASE_TYPE: postgres
AUTOBRR__POSTGRES_DATABASE: autobrr
AUTOBRR__POSTGRES_HOST: "postgres-primary-real.database.svc"
AUTOBRR__POSTGRES_USER: "{{ .AUTOBRR_POSTGRES_USER }}"
AUTOBRR__POSTGRES_PASS: "{{ .AUTOBRR_POSTGRES_PASSWORD }}"
AUTOBRR__POSTGRES_PORT: "5432"
AUTOBRR__SESSION_SECRET: "{{ .AUTOBRR_SESSION_SECRET }}"
dataFrom:
- extract:

View file

@ -40,6 +40,8 @@ spec:
envFrom:
- secretRef:
name: autobrr-secret
- secretRef:
name: autobrr-db-secret
probes:
liveness: &probes
enabled: true

View file

@ -1,3 +1,4 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/cilium/cilium/refs/heads/main/install/kubernetes/cilium/values.schema.json
---
autoDirectNodeRoutes: true
bandwidthManager:
@ -15,7 +16,6 @@ cluster:
name: theshire
cni:
exclusive: false
# devices: eno+|enp+
enableRuntimeDeviceDetection: true
endpointRoutes:
enabled: true
@ -25,7 +25,7 @@ envoy:
enable: false
ipam:
mode: kubernetes
ipv4NativeRoutingCIDR: 10.244.0.0/16
ipv4NativeRoutingCIDR: 10.3.0.0/16
k8sServiceHost: 127.0.0.1
k8sServicePort: 7445
kubeProxyReplacement: true

View file

@ -6,9 +6,7 @@ metadata:
name: l2-policy
spec:
loadBalancerIPs: true
# interfaces: ["^enp.*|^eth.*|^ens.*|^eno.*"]
interfaces: ["^eno+|^enp+|^bond+"]
# interfaces: ["^bond+"]
nodeSelector:
matchLabels:
kubernetes.io/os: linux
@ -21,6 +19,4 @@ metadata:
spec:
allowFirstLastIPs: "Yes"
blocks:
- # Controller VIP: 192.168.42.120
start: 10.1.1.30
stop: 10.1.1.49
- cidr: "10.5.0.0/16"

View file

@ -6,7 +6,7 @@ serviceAccount:
create: true
service:
name: kube-dns
clusterIP: 10.96.0.10
clusterIP: 10.4.0.10
servers:
- zones:
- zone: .

View file

@ -0,0 +1,20 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: external-dns-unifi
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: external-dns-unifi-secret
template:
engineVersion: v2
data:
EXTERNAL_DNS_UNIFI_USER: "{{ .username }}"
EXTERNAL_DNS_UNIFI_PASS: "{{ .password }}"
dataFrom:
- extract:
key: external-dns-unifi

View file

@ -0,0 +1,72 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app external-dns-unifi
spec:
interval: 30m
chart:
spec:
chart: external-dns
version: 1.15.0
sourceRef:
kind: HelmRepository
name: external-dns
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
fullnameOverride: *app
# logLevel: debug
provider:
name: webhook
webhook:
image:
repository: ghcr.io/kashalls/external-dns-unifi-webhook
tag: v0.3.4@sha256:28dc00c7a21f9571d43181fcc0dd3de59e291741f27bc075d7e06378876b2974
env:
- name: UNIFI_HOST
value: https://10.33.44.1
- name: UNIFI_USER
valueFrom:
secretKeyRef:
name: &secret external-dns-unifi-secret
key: EXTERNAL_DNS_UNIFI_USER
- name: UNIFI_PASS
valueFrom:
secretKeyRef:
name: *secret
key: EXTERNAL_DNS_UNIFI_PASS
- name: LOG_LEVEL
value: "debug"
livenessProbe:
httpGet:
path: /healthz
port: http-webhook
initialDelaySeconds: 10
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /readyz
port: http-webhook
initialDelaySeconds: 10
timeoutSeconds: 5
extraArgs:
- --ignore-ingress-tls-spec
triggerLoopOnEvent: true
policy: sync
sources: ["ingress", "service"]
txtOwnerId: theshire
txtPrefix: k8s.theshire.
domainFilters: ["theshire.internal"]
serviceMonitor:
enabled: true
podAnnotations:
secret.reloader.stakater.com/reload: *secret

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./helmrelease.yaml

View file

@ -44,3 +44,25 @@ spec:
wait: true
dependsOn:
- name: external-secrets-stores
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app external-dns-unifi
namespace: flux-system
spec:
targetNamespace: network
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: external-secrets-stores
path: ./kubernetes/apps/network/external-dns/unifi
prune: false
sourceRef:
kind: GitRepository
name: theshire
wait: true
interval: 30m
timeout: 5m

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./namespace.yaml
- ./tailscale/ks.yaml

View file

@ -0,0 +1,7 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: tailscale
annotations:
kustomize.toolkit.fluxcd.io/prune: disabled

View file

@ -0,0 +1,26 @@
---
apiVersion: tailscale.com/v1alpha1
kind: ProxyClass
metadata:
name: kernel-org-tun
spec:
statefulSet:
pod:
tailscaleContainer:
resources:
limits:
kernel.org/tun: 1
securityContext:
allowPrivilegeEscalation: false
runAsUser: 0
capabilities:
drop: ["ALL"]
add: ["NET_ADMIN", "NET_RAW"]
seccompProfile: { type: Unconfined }
tailscaleInitContainer:
securityContext:
privileged: true
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule

View file

@ -0,0 +1,15 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: k8s-operator-oauth
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: k8s-operator-oauth-secret
dataFrom:
- extract:
key: k8s-operator-oauth

View file

@ -0,0 +1,40 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app tailscale-operator
namespace: flux-system
spec:
targetNamespace: tailscale
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/tailscale/tailscale/operator
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: false
interval: 30m
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app tailscale-cluster
namespace: flux-system
spec:
targetNamespace: tailscale
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: tailscale-operator
path: ./kubernetes/apps/tailscale/tailscale/cluster
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: false
interval: 30m

View file

@ -0,0 +1,35 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: tailscale-operator
namespace: tailscale
spec:
interval: 30m
chart:
spec:
chart: tailscale-operator
version: 1.78.3
sourceRef:
kind: HelmRepository
name: tailscale
namespace: flux-system
interval: 30m
install:
crds: CreateReplace
upgrade:
crds: CreateReplace
values:
operatorConfig:
defaultTags:
- "tag:k8s-operator-theshire"
hostname: theshire
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
proxyConfig:
defaultTags: "tag:k8s-theshire"
apiServerProxyConfig:
mode: "true"

View file

@ -0,0 +1,100 @@
param(
[switch]$Verbose
)
if ($Verbose) {
$VerbosePreference = "Continue"
}
$pluginPath = "$env:APPDATA\helm\plugins\helm-diff\bin"
$binaryPath = "$pluginPath\diff.exe"
$latestRelease = "https://github.com/databus23/helm-diff/releases/latest/download/helm-diff-windows-amd64.tgz"
$tempFile = "$env:TEMP\helm-diff.tgz"
function Update-HelmDiff {
try {
Write-Verbose "Checking for existing installation..."
if (Test-Path $binaryPath) {
Write-Host "Found existing helm-diff installation. Updating..."
# Download latest release
Write-Verbose "Downloading latest release..."
try {
Invoke-WebRequest -Uri $latestRelease -OutFile $tempFile -ErrorAction Stop
}
catch {
Write-Error "Failed to download helm-diff: $_"
exit 1
}
# Extract and move new binary
Write-Verbose "Extracting new binary..."
try {
tar -xf $tempFile -C $pluginPath "diff/bin/diff.exe"
Move-Item "$pluginPath\diff\bin\diff.exe" $pluginPath -Force
Remove-Item -Path "$pluginPath\diff" -Recurse -Force
}
catch {
Write-Error "Failed to extract and move binary: $_"
exit 1
}
# Cleanup
Write-Verbose "Cleaning up temporary files..."
Remove-Item $tempFile -ErrorAction SilentlyContinue
Write-Host "helm-diff successfully updated at: $binaryPath"
helm diff version
exit 0
}
else {
Write-Verbose "No existing installation found. Proceeding with new installation."
}
}
catch {
Write-Error "Update failed: $_"
exit 1
}
}
# Main script execution
Update-HelmDiff
# If no existing installation found, proceed with new installation
Write-Verbose "Installing helm-diff..."
try {
New-Item -ItemType Directory -Force -Path $pluginPath | Out-Null
# Download latest release
Write-Verbose "Downloading latest release..."
try {
Invoke-WebRequest -Uri $latestRelease -OutFile $tempFile -ErrorAction Stop
}
catch {
Write-Error "Failed to download helm-diff: $_"
exit 1
}
# Extract and move binary
Write-Verbose "Extracting binary..."
try {
tar -xf $tempFile -C $pluginPath "diff/bin/diff.exe"
Move-Item "$pluginPath\diff\bin\diff.exe" $pluginPath -Force
Remove-Item -Path "$pluginPath\diff" -Recurse -Force
}
catch {
Write-Error "Failed to extract and move binary: $_"
exit 1
}
# Cleanup
Write-Verbose "Cleaning up temporary files..."
Remove-Item $tempFile -ErrorAction SilentlyContinue
Write-Host "helm-diff successfully installed at: $binaryPath"
helm diff version
}
catch {
Write-Error "Installation failed: $_"
exit 1
}

View file

@ -2,7 +2,7 @@
## Prerequisites
```sh
brew install helmfile
mise use helm helmfile
helm plugin install https://github.com/databus23/helm-diff
```
@ -11,14 +11,19 @@ helm plugin install https://github.com/databus23/helm-diff
### Bootstrap talos cluster
```sh
talosctl apply-config --nodes=10.1.1.61 --file=./kubernetes/bootstrap/talos/clusterconfig/homelab-shadowfax.yaml --insecure
talosctl bootstrap --nodes=10.1.1.61
talosctl apply-config --nodes=frodo --file=./kubernetes/bootstrap/talos/clusterconfig/theshire-frodo.yaml --insecure
talosctl apply-config --nodes=bilbo --file=./kubernetes/bootstrap/talos/clusterconfig/theshire-bilbo.yaml --insecure
talosctl apply-config --nodes=sam --file=./kubernetes/bootstrap/talos/clusterconfig/theshire-sam.yaml --insecure
talosctl apply-config --nodes=merry --file=./kubernetes/bootstrap/talos/clusterconfig/theshire-merry.yaml --insecure
talosctl apply-config --nodes=pippin --file=./kubernetes/bootstrap/talos/clusterconfig/theshire-pippin.yaml --insecure
talosctl apply-config --nodes=rosie --file=./kubernetes/bootstrap/talos/clusterconfig/theshire-rosie.yaml --insecure
talosctl bootstrap --nodes=frodo
```
## CNI & Container Proxy
### Install Cilium & Spegel
### Install Cilium, csr-approver, coredns, and Prometheus CRDs.
```sh
helmfile apply -f kubernetes/bootstrap/talos/apps/helmfile.yaml
helmfile apply -f kubernetes/bootstrap/helmfile.yaml
```
## Flux Prep
@ -35,7 +40,6 @@ _These cannot be applied with `kubectl` in the regular fashion due to be encrypt
```sh
sops --decrypt kubernetes/bootstrap/flux/age-key.sops.yaml | kubectl apply -f -
sops --decrypt kubernetes/bootstrap/flux/git-deploy-key.sops.yaml | kubectl apply -f -
sops --decrypt kubernetes/flux/vars/cluster-secrets.sops.yaml | kubectl apply -f -
kubectl apply -f kubernetes/flux/vars/cluster-settings.yaml
```

View file

@ -6,13 +6,16 @@ clusterName: theshire
talosVersion: v1.9.0
# renovate: datasource=docker depName=ghcr.io/siderolabs/kubelet
kubernetesVersion: 1.31.4
endpoint: "https://10.1.1.57:6444"
additionalApiServerCertSans:
- 10.1.1.57
additionalMachineCertSans:
- 10.1.1.57
endpoint: "https://10.1.1.8:6443"
additionalApiServerCertSans: &sans
- 10.1.1.8 # VIP
- 10.1.1.57 # haproxy loadbalancer
additionalMachineCertSans: *sans
clusterPodNets:
- "10.3.0.0/16"
clusterSvcNets:
- "10.4.0.0/16"
## Cilium LBPool CIDR 10.1.1.30-49 --> 10.5.0.0/16
nodes:
- hostname: bilbo
@ -24,6 +27,8 @@ nodes:
networkInterfaces:
- interface: eno1
dhcp: true
vip:
ip: 10.1.1.8
schematic:
customization:
extraKernelArgs:
@ -41,6 +46,8 @@ nodes:
networkInterfaces:
- interface: eno1
dhcp: true
vip:
ip: 10.1.1.8
schematic:
customization:
extraKernelArgs:
@ -58,6 +65,8 @@ nodes:
networkInterfaces:
- interface: eno1
dhcp: true
vip:
ip: 10.1.1.8
schematic:
customization:
extraKernelArgs:
@ -130,6 +139,19 @@ nodes:
- siderolabs/intel-ucode
worker:
patches:
# registries
- &registries |-
machine:
registries:
config:
registry-1.docker.io:
auth:
username: ${dockerUsername}
password: ${dockerPassword}
docker.io:
auth:
username: ${dockerUsername}
password: ${dockerPassword}
# hugepages
- &hugepages |-
machine:
@ -215,6 +237,8 @@ worker:
- 10.1.1.1
controlPlane:
patches:
# registries
- *registries
# hugepages
- *hugepages
# Kubelet local mount

View file

@ -1,4 +1,5 @@
CLUSTER: ENC[AES256_GCM,data:umawZ1n1Sdc=,iv:NUC2lO+edizITkQYC2YtVYQkesPWDj7drFyyaHoyiKI=,tag:bmWfCArxFM9BCdZZgoFzMA==,type:str]
dockerUsername: ENC[AES256_GCM,data:sBsEj/3eJFw=,iv:5kQMyDd20yZFyrh6T353zHXWbdgZb5uN+4D+6Hi/65s=,tag:29Pa5t0gL1/aBHHCnQi9zA==,type:str]
dockerPassword: ENC[AES256_GCM,data:SpFVfXto2aGMU9ZBEbO3Y9viF89xbG2tz+Fw09JgEAiMGlm2,iv:QonP4pG0I3IXUj7CCrtcpzNYLgqpQWyCiyQp49ot7r8=,tag:HLoPTNE8wiqmHavxkcCIaQ==,type:str]
sops:
kms: []
gcp_kms: []
@ -14,8 +15,8 @@ sops:
ekxtU1hrYmJuUkN6aVFnYkdDZ3ZtWk0K86+0Wqzsp9x3I/ZYvq11xMaHS0CR9+yD
Bwp1XZnn4taDz1H93+erJ+dgnjX/STg5KvGqPJQSi7COEZ7EJEJcyA==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2024-08-18T03:36:22Z"
mac: ENC[AES256_GCM,data:fiMzhJfGfmQaJgfDh5+jagPPc51vAe9cfpi4oCIouNDjWrCCjn5ZvaXgIqc19i9ZZhfRINaVag5fZXAm/9D2IIdzyB1jmrA3noCJiJ8ex4noHmmFTrTWdM41/Gth7LCcnrFdhnVKhr50/Zv8hMhFIYwW/iMZx0s7OW2QhHuM+y4=,iv:yofGL0biVVt4kXEA2ZY6O03Rh/CLxd++kVIMFDufjpA=,tag:9Jn8u2D+72dU6XvvkzjVIw==,type:str]
lastmodified: "2025-01-01T06:58:05Z"
mac: ENC[AES256_GCM,data:9qqN464jxJnqMwvxuLOj4Fans3p7ESD9jw75MzXvmwSAJJOQOQFh/7V65L1c/9jksJ9H0pSqdDJhNLX9pmFCyI51BB4u9XmJkKTNHqcnDjbJWQctJ44G/zSn2CV3nB9oF+3JwgdN6Ja3i39yRTGv+dc38uHijASTyDOogMW1sfE=,iv:jKNx0u+1PkdbKznALlepYJwEjyjtcrvg5xWI1OoVhoE=,tag:gV8bZj0pbSpMOEkUGFI3Sg==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.8.1
version: 3.9.2

View file

@ -27,7 +27,7 @@ spec:
postgresql:
synchronous_commit: "on"
pg_hba:
- hostnossl all all 10.32.0.0/16 md5
- hostnossl all all 10.3.0.0/16 md5
- hostssl all all all md5
instances:
- name: postgres

View file

@ -2,11 +2,11 @@
apiVersion: v1
kind: Pod
metadata:
name: disk-wipe-sam
name: disk-wipe-pippin
namespace: kube-system
spec:
restartPolicy: Never
nodeName: sam
nodeName: pippin
containers:
- name: disk-wipe
image: docker.io/library/alpine:latest
@ -16,11 +16,7 @@ spec:
env:
- name: CEPH_DISK
value: "/dev/nvme0n1"
command:
[
"/bin/sh",
"-c"
]
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted;
sgdisk --zap-all $CEPH_DISK;
@ -39,11 +35,11 @@ spec:
apiVersion: v1
kind: Pod
metadata:
name: disk-wipe-frodo
name: disk-wipe-merry
namespace: kube-system
spec:
restartPolicy: Never
nodeName: frodo
nodeName: merry
containers:
- name: disk-wipe
image: docker.io/library/alpine:latest
@ -52,12 +48,8 @@ spec:
resources: {}
env:
- name: CEPH_DISK
value: "/dev/nvme0n1"
command:
[
"/bin/sh",
"-c"
]
value: "/dev/nvme1n1"
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted;
sgdisk --zap-all $CEPH_DISK;
@ -76,11 +68,11 @@ spec:
apiVersion: v1
kind: Pod
metadata:
name: disk-wipe-bilbo
name: disk-wipe-rosie
namespace: kube-system
spec:
restartPolicy: Never
nodeName: bilbo
nodeName: rosie
containers:
- name: disk-wipe
image: docker.io/library/alpine:latest
@ -91,11 +83,7 @@ spec:
- name: CEPH_DISK
value: "/dev/nvme0n1"
command:
[
"/bin/sh",
"-c"
]
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted;
sgdisk --zap-all $CEPH_DISK;

18
mise.toml Normal file
View file

@ -0,0 +1,18 @@
[tools]
helm = "latest"
helmfile = "latest"
task = "latest"
[env]
KUBECONFIG="{{config_root}}/kubeconfig"
SOPS_AGE_KEY_FILE="{{config_root}}/age.key"
TALOSDIR="{{config_root}}/kubernetes/bootstrap/talos"
TALOSCONFIG="{{env['TALOSDIR']}}/clusterconfig/talosconfig"
[tasks.aider]
description = "My Aider workflow."
run = "aider --no-auto-commits --cache-prompts --cache-keepalive-pings 12 --no-suggest-shell-commands --watch-files"
[tasks.talhelper-gen]
description = "Generate talhelper configuration for Talos"
run = "talhelper genconfig --env-file {{env['TALOSDIR']}}/talenv.sops.yaml --secret-file {{env['TALOSDIR']}}/talsecret.sops.yaml --config-file {{env['TALOSDIR']}}/talconfig.yaml --out-dir {{env['TALOSDIR']}}/clusterconfig"