This commit is contained in:
Joseph Hanson 2024-01-11 15:03:54 -06:00
commit 645ed81c88
207 changed files with 6030 additions and 0 deletions

9
.ansible-lint Normal file
View file

@ -0,0 +1,9 @@
---
skip_list:
- yaml[line-length]
- var-naming
warn_list:
- command-instead-of-shell
- deprecated-command-syntax
- experimental
- no-changed-when

23
.editorconfig Normal file
View file

@ -0,0 +1,23 @@
; https://editorconfig.org/
root = true
[*]
indent_style = space
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[{Makefile,go.mod,go.sum,*.go,.gitmodules}]
indent_style = tab
indent_size = 4
[*.md]
indent_size = 4
trim_trailing_whitespace = false
[{Dockerfile,*.bash,*.sh}]
indent_style = space
indent_size = 4

4
.envrc Normal file
View file

@ -0,0 +1,4 @@
#shellcheck disable=SC2148,SC2155
export KUBECONFIG="$(expand_path ./kubeconfig)"
export SOPS_AGE_KEY_FILE="$(expand_path ./age.key)"
export TALOSCONFIG="$(expand_path ./talos/clusterconfig/talosconfig)"

4
.gitattributes vendored Normal file
View file

@ -0,0 +1,4 @@
* text=auto eol=lf
*.sops.* diff=sopsdiffer
*.sops.toml linguist-language=JSON
*.yaml.j2 linguist-language=YAML

13
.gitignore vendored Normal file
View file

@ -0,0 +1,13 @@
.DS_Store
Thumbs.db
.private/
.venv/
.terraform
*.tfvars
.decrypted~*
*.agekey
*.pub
*.key
*.pem
kubeconfig*
config.xml

23
.markdownlint.yaml Normal file
View file

@ -0,0 +1,23 @@
---
default: true
# MD013/line-length - Line length
MD013:
# Number of characters
line_length: 240
# Number of characters for headings
heading_line_length: 80
# Number of characters for code blocks
code_block_line_length: 80
# Include code blocks
code_blocks: true
# Include tables
tables: true
# Include headings
headings: true
# Include headings
headers: true
# Strict length checking
strict: false
# Stern length checking
stern: false

53
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,53 @@
---
fail_fast: false
exclude: |
(?x)^(
docs/_assets/.*
| .*\.sops\.toml
)$
repos:
- repo: https://github.com/adrienverge/yamllint
rev: v1.32.0
hooks:
- id: yamllint
args:
- -c
- ".yamllint.yaml"
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: fix-byte-order-marker
- id: mixed-line-ending
- id: check-added-large-files
args: [--maxkb=2048]
- id: check-merge-conflict
- id: check-executables-have-shebangs
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.4
hooks:
- id: forbid-crlf
- id: forbid-tabs
- repo: https://github.com/sirosen/fix-smartquotes
rev: 0.2.0
hooks:
- id: fix-smartquotes
- repo: https://github.com/jumanjihouse/pre-commit-hooks
rev: 3.0.0
hooks:
- id: shellcheck
language: script
args: [--severity=error]
additional_dependencies: []
- repo: https://github.com/k8s-at-home/sops-pre-commit
rev: v2.1.1
hooks:
- id: forbid-secrets

View file

@ -0,0 +1,37 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"customDatasources": {
"grafana-dashboards": {
"defaultRegistryUrlTemplate": "https://grafana.com/api/dashboards/{{packageName}}",
"format": "json",
"transformTemplates": [
"{\"releases\":[{\"version\": $string(revision)}]}"
]
}
},
"customManagers": [
{
"customType": "regex",
"description": "Process Grafana dashboards",
"fileMatch": [
"(^|/)kubernetes/.+\\.ya?ml(\\.j2)?$"
],
"matchStrings": [
"depName=\"(?<depName>\\S+)\"\\n.*?gnetId: (?<packageName>\\d+)\\n.*?revision: (?<currentValue>\\d+)"
],
"datasourceTemplate": "custom.grafana-dashboards",
"versioningTemplate": "regex:^(?<major>\\d+)$"
}
],
"packageRules": [
{
"addLabels": ["renovate/grafana-dashboard"],
"commitMessageExtra": "to revision {{newVersion}}",
"commitMessageTopic": "dashboard {{depName}}",
"matchDatasources": ["grafana-dashboards", "custom.grafana-dashboards"],
"matchUpdateTypes": ["major"],
"semanticCommitScope": "grafana-dashboards",
"semanticCommitType": "chore"
}
]
}

15
.sops.yaml Normal file
View file

@ -0,0 +1,15 @@
---
creation_rules:
- path_regex: kubernetes/.*\.sops\.ya?ml
encrypted_regex: "^(data|stringData)$"
# Valinor
age: >-
age1g786w8t40g9y29l33rfd4jqlwhrgsxsc7ped6uju60k54j0q3enql3kfve
- path_regex: .*\.sops\.(env|ini|json|toml)
# Valinor
age: >-
age1g786w8t40g9y29l33rfd4jqlwhrgsxsc7ped6uju60k54j0q3enql3kfve
- path_regex: (ansible|terraform|talos)/.*\.sops\.ya?ml
# Valinor
age: >-
age1g786w8t40g9y29l33rfd4jqlwhrgsxsc7ped6uju60k54j0q3enql3kfve

View file

@ -0,0 +1 @@
7cc8dd1959207470e1da885dcb6fda02

View file

@ -0,0 +1,52 @@
---
# yaml-language-server: $schema=https://taskfile.dev/schema.json
version: "3"
vars:
PYTHON_BIN: python3
env:
PATH: "{{.ROOT_DIR}}/.venv/bin:$PATH"
VIRTUAL_ENV: "{{.ROOT_DIR}}/.venv"
ANSIBLE_COLLECTIONS_PATH: "{{.ROOT_DIR}}/.venv/galaxy"
ANSIBLE_ROLES_PATH: "{{.ROOT_DIR}}/.venv/galaxy/ansible_roles"
ANSIBLE_VARS_ENABLED: "host_group_vars,community.sops.sops"
tasks:
deps:
desc: Set up Ansible dependencies for the environment
cmds:
- task: .venv
run:
desc: Run an Ansible playbook for configuring a cluster
summary: |
Args:
cluster: Cluster to run command against (required)
playbook: Playbook to run (required)
prompt: Run Ansible playbook '{{.playbook}}' against the '{{.cluster}}' cluster... continue?
deps: ["deps"]
cmd: |
.venv/bin/ansible-playbook \
--inventory {{.ANSIBLE_DIR}}/{{.cluster}}/inventory/hosts.yaml \
{{.ANSIBLE_DIR}}/{{.cluster}}/playbooks/{{.playbook}}.yaml {{.CLI_ARGS}}
preconditions:
- { msg: "Argument (cluster) is required", sh: "test -n {{.cluster}}" }
- { msg: "Argument (playbook) is required", sh: "test -n {{.playbook}}" }
- { msg: "Venv not found", sh: "test -d {{.ROOT_DIR}}/.venv" }
- { msg: "Inventory not found", sh: "test -f {{.ANSIBLE_DIR}}/{{.cluster}}/inventory/hosts.yaml" }
- { msg: "Playbook not found", sh: "test -f {{.ANSIBLE_DIR}}/{{.cluster}}/playbooks/{{.playbook}}.yaml" }
.venv:
internal: true
cmds:
- true && {{.PYTHON_BIN}} -m venv {{.ROOT_DIR}}/.venv
- .venv/bin/python3 -m pip install --upgrade pip setuptools wheel
- .venv/bin/python3 -m pip install --upgrade --requirement {{.ANSIBLE_DIR}}/requirements.txt
- .venv/bin/ansible-galaxy install --role-file "{{.ANSIBLE_DIR}}/requirements.yaml" --force
sources:
- "{{.ANSIBLE_DIR}}/requirements.txt"
- "{{.ANSIBLE_DIR}}/requirements.yaml"
generates:
- "{{.ROOT_DIR}}/.venv/pyvenv.cfg"

View file

@ -0,0 +1,16 @@
---
version: "3"
tasks:
init:
desc: Initialize pre-commit hooks
cmds:
- pre-commit install --install-hooks
run:
desc: Run pre-commit
cmds:
- pre-commit run --all-files
update:
desc: Update pre-commit hooks
cmds:
- pre-commit autoupdate

View file

@ -0,0 +1,19 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "list-${rsrc}-${ts}"
namespace: "${namespace}"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: OnFailure
containers:
- name: list
image: docker.io/restic/restic:0.16.0
args: ["snapshots"]
envFrom:
- secretRef:
name: "${rsrc}-restic-secret"

View file

@ -0,0 +1,25 @@
---
apiVersion: volsync.backube/v1alpha1
kind: ReplicationDestination
metadata:
name: "${rsrc}-${claim}-${ts}"
namespace: "${namespace}"
spec:
trigger:
manual: restore-once
restic:
repository: "${rsrc}-restic-secret"
destinationPVC: "${claim}"
copyMethod: Direct
storageClassName: ceph-block
# IMPORTANT NOTE:
# Set to the last X number of snapshots to restore from
previous: ${previous}
# OR;
# IMPORTANT NOTE:
# On bootstrap set `restoreAsOf` to the time the old cluster was destroyed.
# This will essentially prevent volsync from trying to restore a backup
# from a application that started with default data in the PVC.
# Do not restore snapshots made after the following RFC3339 Timestamp.
# date --rfc-3339=seconds (--utc)
# restoreAsOf: "2022-12-10T16:00:00-05:00"

View file

@ -0,0 +1,158 @@
---
version: "3"
x-task-vars: &task-vars
rsrc: '{{.rsrc}}'
controller: '{{.controller}}'
namespace: '{{.namespace}}'
claim: '{{.claim}}'
ts: '{{.ts}}'
kustomization: '{{.kustomization}}'
previous: '{{.previous}}'
vars:
destinationTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ReplicationDestination.tmpl.yaml"
wipeJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/WipeJob.tmpl.yaml"
waitForJobScript: "{{.ROOT_DIR}}/.taskfiles/VolSync/wait-for-job.sh"
listJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ListJob.tmpl.yaml"
unlockJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/UnlockJob.tmpl.yaml"
ts: '{{now | date "150405"}}'
tasks:
list:
desc: List all snapshots taken by restic for a given ReplicationSource (ex. task volsync:list rsrc=plex [namespace=default])
silent: true
cmds:
- envsubst < <(cat {{.listJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} list-{{.rsrc}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/list-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m
- kubectl -n {{.namespace}} logs job/list-{{.rsrc}}-{{.ts}} --container list
- kubectl -n {{.namespace}} delete job list-{{.rsrc}}-{{.ts}}
vars:
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.listJobTemplate}}
unlock:
desc: Unlocks restic repository for a given ReplicationSource (ex. task volsync:unlock rsrc=plex [namespace=default])
silent: true
cmds:
- envsubst < <(cat {{.unlockJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} unlock-{{.rsrc}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/unlock-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m
- kubectl -n {{.namespace}} logs job/unlock-{{.rsrc}}-{{.ts}} --container unlock
- kubectl -n {{.namespace}} delete job unlock-{{.rsrc}}-{{.ts}}
vars:
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.unlockJobTemplate}}
# To run backup jobs in parallel for all replicationsources:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot rsrc=$0 namespace=$1'
#
snapshot:
desc: Trigger a Restic ReplicationSource snapshot (ex. task volsync:snapshot rsrc=plex [namespace=default])
cmds:
- kubectl -n {{.namespace}} patch replicationsources {{.rsrc}} --type merge -p '{"spec":{"trigger":{"manual":"{{.ts}}"}}}'
- bash {{.waitForJobScript}} volsync-src-{{.rsrc}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/volsync-src-{{.rsrc}} --for condition=complete --timeout=120m
# TODO: Find a way to output logs
# Error from server (NotFound): jobs.batch "volsync-src-zzztest" not found
# - kubectl -n {{.namespace}} logs job/volsync-src-{{.rsrc}}
vars:
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: kubectl -n {{.namespace}} get replicationsources {{.rsrc}}
msg: "ReplicationSource '{{.rsrc}}' not found in namespace '{{.namespace}}'"
# To run restore jobs in parallel for all replicationdestinations:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=2 -l bash -c 'task volsync:restore rsrc=$0 namespace=$1'
#
restore:
desc: Trigger a Restic ReplicationSource restore (ex. task volsync:restore rsrc=plex [namespace=default])
cmds:
- task: restore-suspend-app
vars: *task-vars
- task: restore-wipe-job
vars: *task-vars
- task: restore-volsync-job
vars: *task-vars
- task: restore-resume-app
vars: *task-vars
vars:
rsrc: '{{ or .rsrc (fail "Variable `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
# 1) Query to find the Flux Kustomization associated with the ReplicationSource (rsrc)
kustomization:
sh: |
kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \
-o jsonpath="{.metadata.labels.kustomize\.toolkit\.fluxcd\.io/name}"
# 2) Query to find the Claim associated with the ReplicationSource (rsrc)
claim:
sh: |
kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \
-o jsonpath="{.spec.sourcePVC}"
# 3) Query to find the controller associated with the PersistentVolumeClaim (claim)
controller:
sh: |
app=$(kubectl -n {{.namespace}} get persistentvolumeclaim {{.claim}} -o jsonpath="{.metadata.labels.app\.kubernetes\.io/name}")
if kubectl -n {{ .namespace }} get deployment.apps/$app >/dev/null 2>&1 ; then
echo "deployment.apps/$app"
else
echo "statefulset.apps/$app"
fi
previous: "{{.previous | default 2}}"
env: *task-vars
preconditions:
- sh: test -f {{.wipeJobTemplate}}
- sh: test -f {{.destinationTemplate}}
- sh: test -f {{.waitForJobScript}}
# Suspend the Flux ks and hr
restore-suspend-app:
internal: true
cmds:
- flux -n flux-system suspend kustomization {{.kustomization}}
- flux -n {{.namespace}} suspend helmrelease {{.rsrc}}
- kubectl -n {{.namespace}} scale {{.controller}} --replicas 0
- kubectl -n {{.namespace}} wait pod --for delete --selector="app.kubernetes.io/name={{.rsrc}}" --timeout=2m
env: *task-vars
# Wipe the PVC of all data
restore-wipe-job:
internal: true
cmds:
- envsubst < <(cat {{.wipeJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} wipe-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m
- kubectl -n {{.namespace}} logs job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --container wipe
- kubectl -n {{.namespace}} delete job wipe-{{.rsrc}}-{{.claim}}-{{.ts}}
env: *task-vars
# Create VolSync replicationdestination CR to restore data
restore-volsync-job:
internal: true
cmds:
- envsubst < <(cat {{.destinationTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m
- kubectl -n {{.namespace}} delete replicationdestination {{.rsrc}}-{{.claim}}-{{.ts}}
env: *task-vars
# Resume Flux ks and hr
restore-resume-app:
internal: true
cmds:
- flux -n {{.namespace}} resume helmrelease {{.rsrc}}
- flux -n flux-system resume kustomization {{.kustomization}}
env: *task-vars

View file

@ -0,0 +1,19 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "unlock-${rsrc}-${ts}"
namespace: "${namespace}"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: OnFailure
containers:
- name: unlock
image: docker.io/restic/restic:0.16.0
args: ["unlock", "--remove-all"]
envFrom:
- secretRef:
name: "${rsrc}-restic-secret"

View file

@ -0,0 +1,25 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "wipe-${rsrc}-${claim}-${ts}"
namespace: "${namespace}"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: OnFailure
containers:
- name: wipe
image: public.ecr.aws/docker/library/busybox:latest
command: ["/bin/sh", "-c", "cd /config; find . -delete"]
volumeMounts:
- name: config
mountPath: /config
securityContext:
privileged: true
volumes:
- name: config
persistentVolumeClaim:
claimName: "${claim}"

View file

@ -0,0 +1,14 @@
#!/usr/bin/env bash
JOB_NAME=$1
NAMESPACE="${2:-default}"
[[ -z "${JOB_NAME}" ]] && echo "Job name not specified" && exit 1
while true; do
STATUS="$(kubectl -n "${NAMESPACE}" get pod -l job-name="${JOB_NAME}" -o jsonpath='{.items[*].status.phase}')"
if [ "${STATUS}" == "Pending" ]; then
break
fi
sleep 1
done

View file

@ -0,0 +1,14 @@
#!/usr/bin/env bash
JOB_NAME=$1
NAMESPACE="${2:-default}"
[[ -z "${JOB_NAME}" ]] && echo "Job name not specified" && exit 1
while true; do
STATUS="$(kubectl -n "${NAMESPACE}" get pod -l job-name="${JOB_NAME}" -o jsonpath='{.items[*].status.phase}')"
if [ "${STATUS}" == "Pending" ]; then
break
fi
sleep 1
done

View file

@ -0,0 +1,47 @@
---
version: "3"
tasks:
gr-sync:
desc: Sync all Flux GitRepositories
cmds:
- |
kubectl get gitrepositories --all-namespaces --no-headers | awk '{print $1, $2}' \
| xargs -P 4 -L 1 bash -c \
'kubectl -n $0 annotate gitrepository/$1 reconcile.fluxcd.io/requestedAt=$(date +%s) --field-manager=flux-client-side-apply --overwrite'
ks-sync:
desc: Sync all Flux Kustomizations
cmds:
- |
kubectl get kustomization --all-namespaces --no-headers | awk '{print $1, $2}' \
| xargs -P 4 -L 1 bash -c \
'kubectl -n $0 annotate kustomization/$1 reconcile.fluxcd.io/requestedAt="$(date +%s)" --field-manager=flux-client-side-apply --overwrite'
hr-sync:
desc: Sync all Flux HelmReleases
cmds:
- |
kubectl get helmreleases --all-namespaces --no-headers | awk '{print $1, $2}' \
| xargs -P 4 -L 1 bash -c \
'kubectl -n $0 annotate helmrelease/$1 reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite'
tf-sync:
desc: Sync Flux Terraforms
cmds:
- |
kubectl get terraforms --all-namespaces --no-headers | awk '{print $1, $2}' \
| xargs -P 4 -L 1 bash -c \
'kubectl -n $0 annotate terraform/$1 reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite'
hr-suspend:
desc: Suspend all Flux HelmReleases
cmds:
- |
flux get helmrelease --all-namespaces --no-header | awk '{print $1, $2}' \
| xargs -L 1 bash -c 'flux -n $0 suspend helmrelease $1'
hr-resume:
desc: Resume all Flux HelmReleases
cmds:
- |
flux get helmrelease --all-namespaces --no-header | awk '{print $1, $2}' \
| xargs -L 1 bash -c 'flux -n $0 resume helmrelease $1'

View file

@ -0,0 +1,12 @@
---
version: "3"
tasks:
hubble:
desc: forward the hubble relay
cmds:
- cilium hubble port-forward &
hubble-ui:
desc: port-forward hubble to 8888
cmds:
- kubectl port-forward -n kube-system svc/hubble-ui 8888:80

View file

@ -0,0 +1,104 @@
---
version: "3"
x-task-vars: &task-vars
node: "{{.node}}"
ceph_disk: "{{.ceph_disk}}"
ts: "{{.ts}}"
jobName: "{{.jobName}}"
vars:
waitForJobScript: "../_scripts/wait-for-k8s-job.sh"
ts: '{{now | date "150405"}}'
tasks:
wipe-node-aule:
desc: Trigger a wipe of Rook-Ceph data on node "aule"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460833"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: aule
wipe-node-orome:
desc: Trigger a wipe of Rook-Ceph data on node "orome"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37645333"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: orome
wipe-node-eonwe:
desc: Trigger a wipe of Rook-Ceph data on node "eonwe"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460887"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: eonwe
wipe-node-arlen:
desc: Trigger a wipe of Rook-Ceph data on node "arlen"
cmds:
- task: wipe-disk
vars:
node: "{{.node}}"
ceph_disk: "/dev/disk/by-id/scsi-0HC_Volume_37460897"
- task: wipe-data
vars:
node: "{{.node}}"
vars:
node: arlen
wipe-disk:
desc: Wipe all remnants of rook-ceph from a given disk (ex. task rook:wipe-disk node=aule ceph_disk="/dev/nvme0n1")
silent: true
internal: true
cmds:
- envsubst < <(cat {{.wipeRookDiskJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} {{.wipeCephDiskJobName}} default
- kubectl -n default wait job/{{.wipeCephDiskJobName}} --for condition=complete --timeout=1m
- kubectl -n default logs job/{{.wipeCephDiskJobName}} --container list
- kubectl -n default delete job {{.wipeCephDiskJobName}}
vars:
node: '{{ or .node (fail "`node` is required") }}'
ceph_disk: '{{ or .ceph_disk (fail "`ceph_disk` is required") }}'
jobName: 'wipe-disk-{{- .node -}}-{{- .ceph_disk | replace "/" "-" -}}-{{- .ts -}}'
wipeRookDiskJobTemplate: "WipeDiskJob.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.wipeRookDiskJobTemplate}}
wipe-data:
desc: Wipe all remnants of rook-ceph from a given disk (ex. task rook:wipe-data node=aule)
silent: true
internal: true
cmds:
- envsubst < <(cat {{.wipeRookDataJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} {{.wipeRookDataJobName}} default
- kubectl -n default wait job/{{.wipeRookDataJobName}} --for condition=complete --timeout=1m
- kubectl -n default logs job/{{.wipeRookDataJobName}} --container list
- kubectl -n default delete job {{.wipeRookDataJobName}}
vars:
node: '{{ or .node (fail "`node` is required") }}'
jobName: "wipe-rook-data-{{- .node -}}-{{- .ts -}}"
wipeRookDataJobTemplate: "WipeRookDataJob.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.waitForJobScript}}
- sh: test -f {{.wipeRookDataJobTemplate}}

View file

@ -0,0 +1,26 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${jobName}"
namespace: "default"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${node}
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.17.3@sha256:999384960b6114496a5e4036e945141c205d064ce23b87326bd3f8d878c5a9d4
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted;
sgdisk --zap-all ${ceph_disk};
blkdiscard ${ceph_disk};
dd if=/dev/zero bs=1M count=10000 oflag=direct of=${ceph_disk};
partprobe ${ceph_disk};

View file

@ -0,0 +1,29 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${jobName}"
namespace: "default"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${node}
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.17.3@sha256:999384960b6114496a5e4036e945141c205d064ce23b87326bd3f8d878c5a9d4
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- rm -rf /mnt/host_var/lib/rook
volumeMounts:
- mountPath: /mnt/host_var
name: host-var
volumes:
- name: host-var
hostPath:
path: /var

19
.taskfiles/rook/pod.yaml Normal file
View file

@ -0,0 +1,19 @@
apiVersion: v1
kind: Pod
metadata:
name: my-pod
spec:
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.17.3@sha256:999384960b6114496a5e4036e945141c205d064ce23b87326bd3f8d878c5a9d4
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted e2fsprogs;
sgdisk --zap-all /dev/nvme1n1;
blkdiscard /dev/nvme1n1;
dd if=/dev/zero bs=1M count=10000 oflag=direct of=/dev/nvme1n1;
sgdisk /dev/nvme1n1
partprobe /dev/nvme1n1;

10
.vscode/extensions.json vendored Normal file
View file

@ -0,0 +1,10 @@
{
"recommendations": [
"mikestead.dotenv",
"redhat.ansible",
"redhat.vscode-yaml",
"signageos.signageos-vscode-sops",
"pkief.material-icon-theme",
"ms-vscode-remote.remote-ssh"
]
}

46
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,46 @@
{
"ansible.validation.lint.arguments": "-c .ansible-lint",
"files.associations": {
"*.json5": "jsonc",
"**/ansible/**/*.yaml": "ansible",
"**/ansible/**/*.sops.yaml": "yaml",
"**/ansible/**/inventory/**/*.yaml": "yaml",
"**/kubernetes/**/*.sops.toml": "plaintext"
},
"material-icon-theme.folders.associations": {
".taskfiles": "utils",
"bootstrap": "import",
"charts": "kubernetes",
"hack": "scripts",
"repositories": "database",
"vars": "other",
// namespaces
"cert-manager": "guard",
"external-secrets": "keys",
"kube-system": "kubernetes",
"monitoring": "event",
"networking": "connection",
"rook-ceph": "dump",
},
"yaml.schemaStore.enable": true,
"yaml.schemas": {
"ansible": "ansible/**/*.yaml",
"kubernetes": "kubernetes/**/*.yaml"
},
"editor.fontFamily": "FiraCode Nerd Font",
"editor.fontLigatures": true,
"editor.bracketPairColorization.enabled": true,
"editor.guides.bracketPairs": true,
"editor.guides.bracketPairsHorizontal": true,
"editor.guides.highlightActiveBracketPair": true,
"editor.hover.delay": 1500,
"editor.stickyScroll.enabled": false,
"editor.rulers": [
100
],
"explorer.autoReveal": false,
"files.trimTrailingWhitespace": true,
"ansible.python.interpreterPath": "/usr/bin/python3",
"sops.defaults.ageKeyFile": "age.key",
"ansible.validation.lint.path": "~/projects/valinor/.venv/bin/ansible-lint"
}

29
.yamllint.yaml Normal file
View file

@ -0,0 +1,29 @@
---
ignore: |
.ansible/
.direnv/
.private/
.vscode/
*.sops.*
ansible/roles/xanmanning.k3s/
extends: default
rules:
truthy:
allowed-values: ["true", "false", "on"]
comments:
min-spaces-from-content: 1
line-length: disable
braces:
min-spaces-inside: 0
max-spaces-inside: 1
brackets:
min-spaces-inside: 0
max-spaces-inside: 0
indentation: enable

1
README.md Normal file
View file

@ -0,0 +1 @@
Kubernetes with Talos @ Hetzner

158
Taskfile.yaml Normal file
View file

@ -0,0 +1,158 @@
---
version: "3"
vars:
PYTHON_BIN: python3
ANSIBLE_DIR: "{{.ROOT_DIR}}/ansible"
KUBERNETES_DIR: "{{.ROOT_DIR}}/kubernetes"
TERRAFORM_DIR: "{{.ROOT_DIR}}/terraform"
CLUSTER_SECRETS_FILE: "{{.CLUSTER_DIR}}/flux/vars/cluster-secrets.sops.env"
CLUSTER_SETTINGS_FILE: "{{.CLUSTER_DIR}}/flux/vars/cluster-settings.env"
env:
KUBECONFIG: "{{.ROOT_DIR}}/kubeconfig"
SOPS_AGE_KEY_FILE: "{{.ROOT_DIR}}/age.key"
PATH: "{{.ROOT_DIR}}/.venv/bin:$PATH"
VIRTUAL_ENV: "{{.ROOT_DIR}}/.venv"
ANSIBLE_COLLECTIONS_PATH: "{{.ROOT_DIR}}/.venv/galaxy"
ANSIBLE_ROLES_PATH: "{{.ROOT_DIR}}/.venv/galaxy/ansible_roles"
ANSIBLE_VARS_ENABLED: "host_group_vars,community.sops.sops"
K8S_AUTH_KUBECONFIG: "{{.ROOT_DIR}}/kubeconfig"
includes:
ansible: .taskfiles/Ansible/Taskfile.yaml
volsync: .taskfiles/VolSync/Tasks.yaml
precommit: .taskfiles/PreCommit/Tasks.yaml
k8s: .taskfiles/k8s/Taskfile.yaml
rook:
taskfile: ".taskfiles/rook"
dir: .taskfiles/rook
flux:
dir: .taskfiles/flux
taskfile: .taskfiles/flux
tasks:
default:
silent: true
cmds: ["task -l"]
init:
desc: Initialize workstation dependencies with Brew
cmds:
- brew install {{.DEPS}} {{.CLI_ARGS}}
preconditions:
- sh: command -v brew
msg: |
Homebrew is not installed. Using MacOS, Linux or WSL?
Head over to https://brew.sh to get up and running.
vars:
DEPS: >-
age
ansible
direnv
derailed/k9s/k9s
fluxcd/tap/flux
go-task/tap/go-task
helm
ipcalc
jq
kubernetes-cli
kustomize
pre-commit
prettier
sops
stern
terraform
tflint
weaveworks/tap/gitops
yamllint
yq
configure-venv:
desc: Install or upgrade the Python virtual env
cmds:
- "{{.PYTHON_BIN}} -m venv {{.ROOT_DIR}}/.venv"
- .venv/bin/python3 -m pip install --upgrade pip setuptools wheel
- .venv/bin/python3 -m pip install --upgrade --requirement "{{.ROOT_DIR}}/requirements.txt"
- .venv/bin/ansible-galaxy install --role-file "{{.ROOT_DIR}}/requirements.yaml" --force
flux-apply:
desc: Apply a resource path that contains Flux substitution variables
dotenv: ['{{.CLUSTER_SETTINGS_FILE}}']
vars:
ks: '{{ or .ks (fail "Missing path (`ks` var)") }}'
cmd: |
sops exec-env {{.CLUSTER_SECRETS_FILE}} \
"kustomize build --load-restrictor=LoadRestrictionsNone {{.ks}} | \
envsubst | kubectl apply --server-side --field-manager=kustomize-controller -f -"
preconditions:
- sh: test -f {{.CLUSTER_SECRETS_FILE}}
- sh: test -f {{.CLUSTER_SETTINGS_FILE}}
sync-secrets:
desc: Sync ExternalSecret resources
vars:
secret: '{{ .secret | default ""}}'
namespace: '{{.namespace | default "default"}}'
cmd: |
{{if eq .secret ""}}
kubectl get externalsecret.external-secrets.io --all-namespaces --no-headers -A | awk '{print $1, $2}' \
| xargs --max-procs=4 -l bash -c 'kubectl -n $0 annotate externalsecret.external-secrets.io $1 force-sync=$(date +%s) --overwrite'
{{else}}
kubectl -n {{.namespace}} annotate externalsecret.external-secrets.io {{.secret}} force-sync=$(date +%s) --overwrite
{{end}}
preconditions:
- kubectl -n {{.namespace}} get externalsecret {{.secret}}
mount-volume:
desc: Mount a PersistentVolumeClaim to a temporary pod
interactive: true
vars:
claim: '{{ or .claim (fail "PersistentVolumeClaim `claim` is required") }}'
namespace: '{{.namespace | default "default"}}'
cmd: |
kubectl run -n {{.namespace}} debug-{{.claim}} -i --tty --rm --image=null --privileged --overrides='
{
"apiVersion": "v1",
"spec": {
"containers": [
{
"name": "debug",
"image": "ghcr.io/onedr0p/alpine:rolling",
"command": ["/bin/bash"],
"stdin": true,
"stdinOnce": true,
"tty": true,
"volumeMounts": [
{
"name": "config",
"mountPath": "/config"
}
]
}
],
"volumes": [
{
"name": "config",
"persistentVolumeClaim": {
"claimName": "{{.claim}}"
}
}
],
"restartPolicy": "Never"
}
}'
preconditions:
- kubectl -n {{.namespace}} get pvc {{.claim}}
# https://github.com/fluxcd/helm-controller/issues/644
"644":
cmds:
- kubectl -n {{.namespace}} delete secret -l owner=helm,name={{.release}},status=pending-upgrade
- flux -n {{.namespace}} reconcile hr {{.release}}
vars:
release: '{{ or .release (fail "HelmRelease `release` is required") }}'
namespace: '{{.namespace | default "default"}}'
preconditions:
- flux -n {{.namespace}} get hr {{.release}}

10
ansible/cilium-install.sh Normal file
View file

@ -0,0 +1,10 @@
#!/bin/bash
cilium install \
--helm-set=ipam.mode=kubernetes \
--helm-set=kubeProxyReplacement=true \
--helm-set=k8sServiceHost=167.235.217.82 \
--helm-set=policyAuditMode=true \
--helm-set=hostFirewall.enabled=true \
--helm-set=extraConfig.allow-localhost=policy \
--helm-set=hubble.relay.enabled=true \
--helm-set=hubble.ui.enabled=true

8
ansible/main/.envrc Normal file
View file

@ -0,0 +1,8 @@
#shellcheck disable=SC2148,SC2155
export SOPS_AGE_KEY_FILE="$(expand_path ../../age.key)"
export VIRTUAL_ENV="$(expand_path ../../.venv)"
export ANSIBLE_COLLECTIONS_PATH=$(expand_path ../../.venv/galaxy)
export ANSIBLE_ROLES_PATH=$(expand_path ../../.venv/galaxy/ansible_roles)
export ANSIBLE_VARS_ENABLED="host_group_vars,community.sops.sops"
export ANSIBLE_INVENTORY=$(expand_path ./inventory/hosts.yaml)
PATH_add "$(expand_path ../../.venv/bin)"

View file

@ -0,0 +1,28 @@
---
# renovate: datasource=github-releases depName=k3s-io/k3s
k3s_release_version: "v1.29.0+k3s1"
k3s_install_hard_links: true
k3s_become: true
k3s_etcd_datastore: true
k3s_registration_address: 10.5.0.2
# /var/lib/rancher/k3s/server/manifests
k3s_server_manifests_urls:
# Essential Prometheus Operator CRDs (the rest are installed with the kube-prometheus-stack helm release)
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
filename: custom-prometheus-podmonitors.yaml
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
filename: custom-prometheus-prometheusrules.yaml
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml
filename: custom-prometheus-scrapeconfigs.yaml
- url: https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
filename: custom-prometheus-servicemonitors.yaml
# /var/lib/rancher/k3s/server/manifests
k3s_server_manifests_templates:
- custom-coredns-helmchart.yaml.j2
- custom-cilium-helmchart.yaml.j2
# k3s_registries:
# mirrors:
# docker.io:
# endpoint: ["http://harbor.hsn.dev/v2/docker.io"]
# ghcr.io:
# endpoint: ["http://harbor.hsn.dev/v2/ghcr.io"]

View file

@ -0,0 +1,3 @@
---
github_username: jahanson
timezone: America/Chicago

View file

@ -0,0 +1,25 @@
---
k3s_control_node: true
k3s_server:
cluster-cidr: 10.32.0.0/16
disable: ["coredns", "flannel", "local-storage", "metrics-server", "servicelb", "traefik"]
disable-cloud-controller: true
disable-helm-controller: false
disable-kube-proxy: true
disable-network-policy: true
docker: false
etcd-disable-snapshots: true
etcd-expose-metrics: true
flannel-backend: "none" # quote
https-listen-port: 6443
# kube-apiserver-arg: ["anonymous-auth=true"]
# kubelet-arg: ["feature-gates=ImageMaximumGCAge=true","imageMaximumGCAge=30m"]
kubelet-arg: ["image-gc-high-threshold=85","image-gc-low-threshold=80"]
kube-controller-manager-arg: ["bind-address=0.0.0.0"]
kube-scheduler-arg: ["bind-address=0.0.0.0"]
node-ip: "{{ ansible_host }}"
pause-image: registry.k8s.io/pause:3.9
secrets-encryption: true
service-cidr: 10.33.0.0/16
tls-san: ["{{ k3s_registration_address }}"]
write-kubeconfig-mode: "0644"

View file

@ -0,0 +1,5 @@
---
k3s_control_node: false
k3s_agent:
node-ip: "{{ ansible_host }}"
pause-image: registry.k8s.io/pause:3.9

View file

@ -0,0 +1,18 @@
---
kubernetes:
vars:
ansible_user: jahanson
ansible_ssh_port: 22
children:
master:
hosts:
galadriel:
ansible_host: 10.1.1.61
thrain:
ansible_host: 10.1.1.62
cirdan:
ansible_host: 10.1.1.63
workers:
hosts:
qbee:
ansible_host: 10.1.1.41

View file

@ -0,0 +1,44 @@
---
- name: Add user 'jahanson' and add to sudo group
hosts: all
become: true
tasks:
- name: Create user 'jahanson'
ansible.builtin.user:
name: jahanson
state: present
- name: Add user 'jahanson' to sudo group
when: ansible_user == 'root'
ansible.builtin.user:
name: jahanson
groups: sudo
append: true
- name: User Configuration | SSH keys
ansible.posix.authorized_key:
user: "jahanson"
key: "https://github.com/jahanson.keys"
- name: User Configuration | Silence login
ansible.builtin.file:
dest: "{{ '/home/' + ansible_user if ansible_user != 'root' else '/root' }}/.hushlogin"
state: touch
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: "0644"
modification_time: preserve
access_time: preserve
- name: Copy .vimrc file
ansible.builtin.copy:
src: "files/.vimrc"
dest: "/home/jahanson/.vimrc"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: "0644"
- name: User Configuration | Add user to sudoers
ansible.builtin.copy:
content: "jahanson ALL=(ALL:ALL) NOPASSWD:ALL"
dest: "/etc/sudoers.d/jahanson"
owner: root
group: root
mode: "0440"

View file

@ -0,0 +1,40 @@
---
- name: Reset Ceph Drives
hosts: kubernetes
become: true
gather_facts: true
any_errors_fatal: true
pre_tasks:
- name: Pausing for 2 seconds...
ansible.builtin.pause:
seconds: 2
tasks:
- name: Reset Ceph Drives # noqa: ignore-errors
ignore_errors: true
when: ceph_drives | default([]) | length > 0
block:
- name: Delete (/var/lib/rook)
ansible.builtin.file:
state: absent
path: /var/lib/rook
- name: Delete (/dev/mapper/ceph-*) # noqa: no-changed-when
ansible.builtin.shell: |
set -o pipefail
ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove_all --force % || true
- name: Delete (/dev/ceph-*) # noqa: no-changed-when
ansible.builtin.command: rm -rf /dev/ceph-*
- name: Delete (/dev/mapper/ceph--*) # noqa: no-changed-when
ansible.builtin.command: rm -rf /dev/mapper/ceph--*
- name: Wipe (sgdisk) # noqa: no-changed-when
ansible.builtin.command: "sgdisk --zap-all {{ item }}"
loop: "{{ ceph_drives }}"
- name: Wipe (dd) # noqa: no-changed-when
ansible.builtin.command: "dd if=/dev/zero of={{ item }} bs=1M count=100 oflag=direct,dsync"
loop: "{{ ceph_drives }}"
- name: Wipe (blkdiscard) # noqa: no-changed-when
ansible.builtin.command: "blkdiscard {{ item }}"
loop: "{{ ceph_drives }}"
when: "'nvme' in item"
- name: Wipe (partprobe) # noqa: no-changed-when
ansible.builtin.command: "partprobe {{ item }}"
loop: "{{ ceph_drives }}"

View file

@ -0,0 +1,95 @@
---
- name: Cluster Installation
hosts: kubernetes
become: true
gather_facts: true
any_errors_fatal: true
pre_tasks:
- name: Pausing for 2 seconds...
ansible.builtin.pause:
seconds: 2
tasks:
- name: Check if cluster is installed
check_mode: false
ansible.builtin.stat:
path: /etc/rancher/k3s/config.yaml
register: k3s_installed
- name: Ignore manifests templates and urls if the cluster is already installed
when: k3s_installed.stat.exists
ansible.builtin.set_fact:
k3s_server_manifests_templates: []
k3s_server_manifests_urls: []
- name: Install Kubernetes
ansible.builtin.include_role:
name: xanmanning.k3s
public: true
vars:
k3s_state: installed
- name: Kubeconfig
ansible.builtin.include_tasks: tasks/kubeconfig.yaml
vars:
repository_base: "{{ lookup('ansible.builtin.pipe', 'git rev-parse --show-toplevel') }}"
- name: Wait for custom manifests to rollout
when:
- k3s_primary_control_node
- (k3s_server_manifests_templates | length > 0
or k3s_server_manifests_urls | length > 0)
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
kind: "{{ item.kind }}"
name: "{{ item.name }}"
namespace: "{{ item.namespace | default('') }}"
wait: true
wait_sleep: 10
wait_timeout: 360
loop:
- { name: cilium, kind: HelmChart, namespace: kube-system }
- { name: coredns, kind: HelmChart, namespace: kube-system }
- { name: policy, kind: CiliumL2AnnouncementPolicy }
- { name: pool, kind: CiliumLoadBalancerIPPool }
- { name: podmonitors.monitoring.coreos.com, kind: CustomResourceDefinition }
- { name: prometheusrules.monitoring.coreos.com, kind: CustomResourceDefinition }
- { name: scrapeconfigs.monitoring.coreos.com, kind: CustomResourceDefinition }
- { name: servicemonitors.monitoring.coreos.com, kind: CustomResourceDefinition }
- name: Coredns
when: k3s_primary_control_node
ansible.builtin.include_tasks: tasks/coredns.yaml
- name: Cilium
when: k3s_primary_control_node
ansible.builtin.include_tasks: tasks/cilium.yaml
- name: Cruft
when: k3s_primary_control_node
ansible.builtin.include_tasks: tasks/cruft.yaml
- name: Stale Containers
ansible.builtin.include_tasks: tasks/stale_containers.yaml
vars:
stale_containers_state: disabled
# - name: Helm controller
# notify: Restart Kubernetes
# when: k3s_control_node
# ansible.builtin.include_tasks: tasks/helm_controller.yaml
# TODO: Replace this with embedded spegel in the future
- name: Copy custom containerd configuration
notify: Restart Kubernetes
ansible.builtin.copy:
src: files/config.toml.tmpl
dest: /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl
owner: root
group: root
mode: "0644"
handlers:
- name: Restart Kubernetes
ansible.builtin.systemd:
name: k3s
state: restarted

View file

@ -0,0 +1,61 @@
---
- name: Cluster Nuke
hosts: kubernetes
become: true
gather_facts: true
any_errors_fatal: true
pre_tasks:
- name: Pausing for 2 seconds...
ansible.builtin.pause:
seconds: 2
tasks:
- name: Stop Kubernetes # noqa: ignore-errors
ignore_errors: true
block:
- name: Stop Kubernetes
ansible.builtin.include_role:
name: xanmanning.k3s
public: true
vars:
k3s_state: stopped
# https://github.com/k3s-io/docs/blob/main/docs/installation/network-options.md
- name: Networking
block:
- name: Networking | Delete Cilium links
ansible.builtin.command:
cmd: "ip link delete {{ item }}"
removes: "/sys/class/net/{{ item }}"
loop: ["cilium_host", "cilium_net", "cilium_vxlan"]
- name: Networking | Flush iptables
ansible.builtin.iptables:
table: "{{ item }}"
flush: true
loop: ["filter", "nat", "mangle", "raw"]
- name: Networking | Flush ip6tables
ansible.builtin.iptables:
table: "{{ item }}"
flush: true
ip_version: ipv6
loop: ["filter", "nat", "mangle", "raw"]
- name: Networking | Delete CNI directory
ansible.builtin.file:
path: /etc/cni/net.d
state: absent
- name: Uninstall Kubernetes
ansible.builtin.include_role:
name: xanmanning.k3s
public: true
vars:
k3s_state: uninstalled
- name: Stale Containers
ansible.builtin.include_tasks: tasks/stale_containers.yaml
vars:
stale_containers_state: disabled
- name: Reboot
ansible.builtin.reboot:
msg: Rebooting nodes
reboot_timeout: 3600

View file

@ -0,0 +1,130 @@
---
- name: Prepare System
hosts: kubernetes
become: true
gather_facts: true
any_errors_fatal: true
pre_tasks:
- name: Pausing for 2 seconds...
ansible.builtin.pause:
seconds: 2
tasks:
- name: Locale
block:
- name: Locale | Set timezone
community.general.timezone:
name: "{{ timezone | default('Etc/UTC') }}"
- name: Packages
block:
- name: Packages | Add non-free repository
ansible.builtin.apt_repository:
repo: deb http://deb.debian.org/debian/ stable main contrib non-free
filename: non-free
update_cache: true
- name: Packages | Install Intel common packages
when: inventory_hostname == 'orome'
ansible.builtin.apt:
name: vim,i965-va-driver-shaders,apt-transport-https,ca-certificates,conntrack,curl,dirmngr,gdisk,
gnupg,hdparm,htop,btop,intel-gpu-tools,intel-media-va-driver-non-free,iperf3,iptables,iputils-ping,ipvsadm,
libseccomp2,lm-sensors,neofetch,net-tools,nfs-common,nvme-cli,open-iscsi,parted,psmisc,python3,
python3-apt,python3-openshift,python3-kubernetes,python3-yaml,smartmontools,socat,software-properties-common,
unzip,util-linux
install_recommends: false
- name: Packages | Install AMD common packages
when: inventory_hostname != 'orome'
ansible.builtin.apt:
name: vim,apt-transport-https,ca-certificates,conntrack,curl,dirmngr,gdisk,
gnupg,hdparm,htop,btop,iperf3,iptables,iputils-ping,ipvsadm,
libseccomp2,lm-sensors,neofetch,net-tools,nfs-common,nvme-cli,open-iscsi,parted,psmisc,python3,
python3-apt,python3-openshift,python3-kubernetes,python3-yaml,smartmontools,socat,software-properties-common,
unzip,util-linux
install_recommends: false
- name: Fish
block:
- name: Fish | Add fish apt key
ansible.builtin.get_url:
url: https://download.opensuse.org/repositories/shells:fish:release:3/Debian_12/Release.key
dest: /etc/apt/trusted.gpg.d/fish.asc
owner: root
group: root
mode: "0644"
- name: Fish | Add fish repository
ansible.builtin.apt_repository:
repo: deb [signed-by=/etc/apt/trusted.gpg.d/fish.asc] http://download.opensuse.org/repositories/shells:/fish:/release:/3/Debian_12/ /
filename: fish
update_cache: true
- name: Fish | Install fish
ansible.builtin.apt:
name: fish
install_recommends: false
- name: Fish | Set as default shell
ansible.builtin.user:
name: "{{ ansible_user }}"
shell: /usr/bin/fish
- name: Fish | Create configuration directory
ansible.builtin.file:
path: "{{ '/home/' + ansible_user if ansible_user != 'root' else '/root' }}/.config/fish/functions"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
recurse: true
- name: Fish | Create neofetch greeting
ansible.builtin.copy:
dest: "{{ '/home/' + ansible_user if ansible_user != 'root' else '/root' }}/.config/fish/functions/fish_greeting.fish"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: "0755"
content: neofetch --config none
- name: Fish | Create kubectl shorthand
ansible.builtin.copy:
dest: "{{ '/home/' + ansible_user if ansible_user != 'root' else '/root' }}/.config/fish/functions/k.fish"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: "0755"
content: |
function k --wraps=kubectl --description 'kubectl shorthand'
kubectl $argv
end
- name: System Configuration
notify: Reboot
block:
- name: System Configuration | Disable swap
ansible.posix.mount:
name: "{{ item }}"
fstype: swap
state: absent
loop: ["none", "swap"]
- name: System Configuration | Create Kernel modules
ansible.builtin.copy:
dest: "/etc/modules-load.d/{{ item }}.conf"
mode: "0644"
content: "{{ item }}"
loop: ["br_netfilter", "ceph", "ip_vs", "ip_vs_rr", "nbd", "overlay", "rbd", "tcp_bbr"]
register: modules_status
- name: System Configuration | Reload Kernel modules # noqa: no-changed-when no-handler
when: modules_status.changed
ansible.builtin.systemd:
name: systemd-modules-load
state: restarted
- name: System Configuration | Sysctl
ansible.posix.sysctl:
name: "{{ item.key }}"
value: "{{ item.value }}"
sysctl_file: /etc/sysctl.d/99-kubernetes.conf
reload: true
with_dict: "{{ sysctl_config }}"
vars:
sysctl_config:
fs.inotify.max_queued_events: 65536
fs.inotify.max_user_watches: 524288
fs.inotify.max_user_instances: 8192
handlers:
- name: Reboot
ansible.builtin.reboot:
msg: Rebooting nodes
reboot_timeout: 3600

View file

@ -0,0 +1,71 @@
---
# https://github.com/kevincoakley/ansible-role-k8s-rolling-update
- name: Cluster update rollout
hosts: kubernetes
become: true
gather_facts: true
any_errors_fatal: true
serial: 1
pre_tasks:
- name: Pausing for 2 seconds...
ansible.builtin.pause:
seconds: 2
tasks:
- name: Details
ansible.builtin.command: "kubectl get node {{ inventory_hostname }} -o json"
register: kubectl_get_node
delegate_to: "{{ groups['master'][0] }}"
failed_when: false
changed_when: false
- name: Update
when:
# When status.conditions[x].type == Ready then check stats.conditions[x].status for True|False
- kubectl_get_node['stdout'] | from_json | json_query("status.conditions[?type == 'Ready'].status")
# If spec.unschedulable is defined then the node is cordoned
- not (kubectl_get_node['stdout'] | from_json).spec.unschedulable is defined
block:
- name: Cordon
kubernetes.core.k8s_drain:
name: "{{ inventory_hostname }}"
kubeconfig: /etc/rancher/k3s/k3s.yaml
state: cordon
delegate_to: "{{ groups['master'][0] }}"
- name: Drain
kubernetes.core.k8s_drain:
name: "{{ inventory_hostname }}"
kubeconfig: /etc/rancher/k3s/k3s.yaml
state: drain
delete_options:
delete_emptydir_data: true
ignore_daemonsets: true
terminate_grace_period: 600
wait_timeout: 900
pod_selectors:
- app!=rook-ceph-osd
delegate_to: "{{ groups['master'][0] }}"
- name: Update
ansible.builtin.apt:
upgrade: dist
update_cache: true
- name: Check if reboot is required
ansible.builtin.stat:
path: /var/run/reboot-required
register: reboot_required
- name: Reboot
when: reboot_required.stat.exists
ansible.builtin.reboot:
msg: Rebooting node
post_reboot_delay: 120
reboot_timeout: 3600
- name: Uncordon
kubernetes.core.k8s_drain:
name: "{{ inventory_hostname }}"
kubeconfig: /etc/rancher/k3s/k3s.yaml
state: uncordon
delegate_to: "{{ groups['master'][0] }}"

View file

@ -0,0 +1,2 @@
source $VIMRUNTIME/defaults.vim
set mouse-=a

View file

@ -0,0 +1,25 @@
version = 2
[plugins."io.containerd.internal.v1.opt"]
path = "/var/lib/rancher/k3s/agent/containerd"
[plugins."io.containerd.grpc.v1.cri"]
stream_server_address = "127.0.0.1"
stream_server_port = "10010"
enable_selinux = false
enable_unprivileged_ports = true
enable_unprivileged_icmp = true
sandbox_image = "registry.k8s.io/pause:3.9"
[plugins."io.containerd.grpc.v1.cri".containerd]
snapshotter = "overlayfs"
disable_snapshot_annotations = true
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/var/lib/rancher/k3s/agent/etc/containerd/certs.d"

View file

@ -0,0 +1,6 @@
[Unit]
Description=Stale containers
[Service]
Type=oneshot
ExecStart=/usr/local/bin/k3s crictl rmi --prune

View file

@ -0,0 +1,11 @@
[Unit]
Description=Stale containers
[Timer]
OnCalendar=weekly
AccuracySec=1h
Persistent=true
RandomizedDelaySec=6000
[Install]
WantedBy=timers.target

View file

@ -0,0 +1,56 @@
---
- name: Cilium
block:
- name: Cilium | Check if Cilium HelmChart exists
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: cilium
kind: HelmChart
namespace: kube-system
register: cilium_helmchart
- name: Cilium | Wait for Cilium to rollout
when: cilium_helmchart.resources | count > 0
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: helm-install-cilium
kind: Job
namespace: kube-system
wait: true
wait_condition:
type: Complete
status: true
wait_timeout: 360
- name: Cilium | Patch the Cilium HelmChart to unmanage it
when: cilium_helmchart.resources | count > 0
kubernetes.core.k8s_json_patch:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: cilium
kind: HelmChart
namespace: kube-system
patch:
- op: add
path: /metadata/annotations/helmcharts.helm.cattle.io~1unmanaged
value: "true"
- name: Cilium | Delete the Cilium HelmChart CR
when: cilium_helmchart.resources | count > 0
kubernetes.core.k8s:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: cilium
kind: HelmChart
namespace: kube-system
state: absent
- name: Cilium | Force delete the Cilium HelmChart
when: cilium_helmchart.resources | count > 0
kubernetes.core.k8s:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: cilium
kind: HelmChart
namespace: kube-system
state: patched
definition:
metadata:
finalizers: []

View file

@ -0,0 +1,56 @@
---
- name: Coredns
block:
- name: Coredns | Check if Coredns HelmChart exists
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: coredns
kind: HelmChart
namespace: kube-system
register: coredns_helmchart
- name: Coredns | Wait for Coredns to rollout
when: coredns_helmchart.resources | count > 0
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: helm-install-coredns
kind: Job
namespace: kube-system
wait: true
wait_condition:
type: Complete
status: true
wait_timeout: 360
- name: Coredns | Patch the Coredns HelmChart to unmanage it
when: coredns_helmchart.resources | count > 0
kubernetes.core.k8s_json_patch:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: coredns
kind: HelmChart
namespace: kube-system
patch:
- op: add
path: /metadata/annotations/helmcharts.helm.cattle.io~1unmanaged
value: "true"
- name: Coredns | Delete the Coredns HelmChart CR
when: coredns_helmchart.resources | count > 0
kubernetes.core.k8s:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: coredns
kind: HelmChart
namespace: kube-system
state: absent
- name: Coredns | Force delete the Coredns HelmChart
when: coredns_helmchart.resources | count > 0
kubernetes.core.k8s:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: coredns
kind: HelmChart
namespace: kube-system
state: patched
definition:
metadata:
finalizers: []

View file

@ -0,0 +1,32 @@
---
# https://github.com/k3s-io/k3s/issues/1971
- name: Cruft
block:
- name: Cruft | Get list of custom manifests
ansible.builtin.find:
paths: "{{ k3s_server_manifests_dir }}"
file_type: file
use_regex: true
patterns: ["^custom-.*"]
register: custom_manifest
- name: Cruft | Delete custom manifests
ansible.builtin.file:
path: "{{ item.path }}"
state: absent
loop: "{{ custom_manifest.files }}"
- name: Cruft | Get list of custom addons
kubernetes.core.k8s_info:
kubeconfig: /etc/rancher/k3s/k3s.yaml
kind: Addon
register: addons_list
- name: Cruft | Delete addons
kubernetes.core.k8s:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: "{{ item.metadata.name }}"
kind: Addon
namespace: kube-system
state: absent
loop: "{{ addons_list.resources | selectattr('metadata.name', 'match', '^custom-.*') | list }}"

View file

@ -0,0 +1,16 @@
---
- name: Helm Controller
block:
- name: Helm Controller | Disable Helm controller
ansible.builtin.replace:
path: /etc/rancher/k3s/config.yaml
regexp: '^disable-helm-controller: false$'
replace: 'disable-helm-controller: true'
- name: Helm Controller | Delete Helm controller CRDs
kubernetes.core.k8s:
kubeconfig: /etc/rancher/k3s/k3s.yaml
name: "{{ item }}"
kind: CustomResourceDefinition
state: absent
loop: ["helmcharts.helm.cattle.io", "helmchartconfigs.helm.cattle.io"]

View file

@ -0,0 +1,36 @@
---
# https://github.com/k3s-io/k3s/issues/1900
- name: Enabled Stale containers
when: stale_containers_state == "enabled"
block:
- name: Stale containers | Create systemd unit
ansible.builtin.copy:
src: files/stale-containers.service
dest: /etc/systemd/system/stale-containers.service
owner: root
group: root
mode: "0644"
- name: Stale containers | Create systemd timer
ansible.builtin.copy:
src: files/stale-containers.timer
dest: /etc/systemd/system/stale-containers.timer
owner: root
group: root
mode: "0644"
- name: Stale containers | Start the systemd timer
ansible.builtin.systemd:
name: stale-containers.timer
enabled: true
daemon_reload: true
masked: false
state: started
- name: Disable Stale containers
when: stale_containers_state == "disabled"
block:
- name: Stale containers | Mask the systemd timer
ansible.builtin.systemd:
name: stale-containers.timer
masked: true

View file

@ -0,0 +1,46 @@
---
# https://docs.k3s.io/helm
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: cilium
namespace: kube-system
spec:
# renovate: datasource=helm
repo: https://helm.cilium.io/
chart: cilium
version: 1.14.5
targetNamespace: kube-system
bootstrap: true
valuesContent: |-
cluster:
name: homelab
id: 1
containerRuntime:
integration: containerd
socketPath: /var/run/k3s/containerd/containerd.sock
hubble:
enabled: true
relay:
enabled: true
ui:
enabled: true
ipam:
mode: kubernetes
ipv4NativeRoutingCIDR: "{{ k3s_server['cluster-cidr'] }}"
k8sServiceHost: "{{ k3s_registration_address }}"
k8sServicePort: 6443
kubeProxyReplacement: true
localRedirectPolicy: true
operator:
rollOutPods: true
rollOutCiliumPods: true
securityContext:
privileged: true
policyAuditMode: true
hostFirewall:
enabled: true
extraConfig:
allow-localhost: policy

View file

@ -0,0 +1,77 @@
---
# https://docs.k3s.io/helm
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: coredns
namespace: kube-system
spec:
# renovate: datasource=helm
repo: https://coredns.github.io/helm
chart: coredns
version: 1.29.0
targetNamespace: kube-system
bootstrap: true
valuesContent: |-
fullnameOverride: coredns
replicaCount: 2
k8sAppLabelOverride: kube-dns
service:
name: kube-dns
clusterIP: {{ k3s_server['service-cidr'] | ansible.utils.nthhost(10) }}
serviceAccount:
create: true
deployment:
annotations:
reloader.stakater.com/auto: "true"
servers:
- zones:
- zone: .
scheme: dns://
use_tcp: true
port: 53
plugins:
- name: log
- name: errors
- name: health
configBlock: |-
lameduck 5s
- name: ready
- name: kubernetes
parameters: cluster.local in-addr.arpa ip6.arpa
configBlock: |-
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
- name: prometheus
parameters: 0.0.0.0:9153
- name: forward
parameters: . /etc/resolv.conf
- name: cache
parameters: 30
- name: loop
- name: reload
- name: loadbalance
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/instance: coredns

8
ansible/requirements.txt Normal file
View file

@ -0,0 +1,8 @@
ansible==9.1.0
ansible-lint==6.22.1
# https://github.com/pyca/bcrypt/issues/684
bcrypt==4.1.2
jmespath==1.0.1
netaddr==0.10.0
openshift==0.13.2
passlib==1.7.4

18
ansible/requirements.yaml Normal file
View file

@ -0,0 +1,18 @@
---
collections:
- name: ansible.posix
version: 1.5.4
- name: ansible.utils
version: 3.0.0
- name: community.general
version: 8.1.0
- name: community.sops
version: 1.6.7
- name: kubernetes.core
version: 3.0.0
- name: onepassword.connect
version: 2.2.4
roles:
- name: xanmanning.k3s
src: https://github.com/PyratLabs/ansible-role-k3s
version: v3.4.3

View file

@ -0,0 +1,47 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: cert-manager
namespace: cert-manager
spec:
interval: 30m
chart:
spec:
chart: cert-manager
version: v1.13.3
sourceRef:
kind: HelmRepository
name: jetstack
namespace: flux-system
interval: 30m
install:
crds: CreateReplace
upgrade:
crds: CreateReplace
values:
installCRDs: true
webhook:
enabled: true
extraArgs:
- --dns01-recursive-nameservers=1.1.1.1:53,9.9.9.9:53
- --dns01-recursive-nameservers-only
- --enable-certificate-owner-ref
replicaCount: 1
podDnsPolicy: "None"
podDnsConfig:
nameservers:
- "1.1.1.1"
- "9.9.9.9"
prometheus:
enabled: true
servicemonitor:
enabled: true
prometheusInstance: monitoring

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: cert-manager
resources:
- ./helmrelease.yaml

View file

@ -0,0 +1,19 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: cloudflare-api-token
namespace: cert-manager
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: cloudflare-api-token
creationPolicy: Owner
data:
- secretKey: api-token
remoteRef:
key: Cloudflare
property: hsn_api_token

View file

@ -0,0 +1,22 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/cert-manager.io/clusterissuer_v1.json
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-cloudflare-production
spec:
acme:
email: "joe@veri.dev"
preferredChain: ""
privateKeySecretRef:
name: letsencrypt-cloudflare-production
server: https://acme-v02.api.letsencrypt.org/directory
solvers:
- dns01:
cloudflare:
apiTokenSecretRef:
name: cloudflare-api-token
key: api-token
selector:
dnsZones:
- hsn.dev

View file

@ -0,0 +1,22 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/cert-manager.io/clusterissuer_v1.json
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-cloudflare-staging
spec:
acme:
email: "joe@veri.dev"
preferredChain: ""
privateKeySecretRef:
name: letsencrypt-cloudflare-staging
server: https://acme-staging-v02.api.letsencrypt.org/directory
solvers:
- dns01:
cloudflare:
apiTokenSecretRef:
name: cloudflare-api-token
key: api-token
selector:
dnsZones:
- hsn.dev

View file

@ -0,0 +1,22 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: flow-schema-reader
rules:
- apiGroups: ["flowcontrol.apiserver.k8s.io"]
resources: ["flowschemas", "prioritylevelconfigurations"]
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: grant-flow-schema-permission
subjects:
- kind: ServiceAccount
name: dnsimple-issuer-cert-manager-webhook-dnsimple
namespace: cert-manager
roleRef:
kind: ClusterRole
name: flow-schema-reader
apiGroup: rbac.authorization.k8s.io

View file

@ -0,0 +1,23 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: dnsimple-api-token
namespace: cert-manager
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: dnsimple-api-token
creationPolicy: Owner
data:
- secretKey: api-token
remoteRef:
key: DNSimple
property: cert-manager
- secretKey: letsencrypt-email
remoteRef:
key: DNSimple
property: letsencrypt-email

View file

@ -0,0 +1,36 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: dnsimple-issuer
namespace: cert-manager
spec:
interval: 30m
chart:
spec:
chart: cert-manager-webhook-dnsimple
version: 0.0.11
interval: 30m
sourceRef:
kind: HelmRepository
name: jahanson
namespace: flux-system
values:
controller:
annotations:
reloader.stakater.com/auto: "true"
dnsimple:
token:
valueFrom:
secretKeyRef:
name: dnsimple-api-token
key: api-token
clusterIssuer:
email:
valueFrom:
secretKeyRef:
name: dnsimple-api-token
key: letsencrypt-email
containerport: 8443

View file

@ -0,0 +1,22 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/cert-manager.io/clusterissuer_v1.json
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-dnsimple-production
spec:
acme:
email: "joe@veri.dev"
preferredChain: ""
privateKeySecretRef:
name: letsencrypt-dnsimple-production
server: https://acme-v02.api.letsencrypt.org/directory
solvers:
- dns01:
webhook:
config:
tokenSecretRef:
key: api-token
name: dnsimple-api-token
solverName: dnsimple
groupName: acme.jahanson.com

View file

@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/cert-manager.io/clusterissuer_v1.json
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
preferredChain: ""
privateKeySecretRef:
name: letsencrypt-staging
server: https://acme-staging-v02.api.letsencrypt.org/directory
solvers:
- dns01:
webhook:
config:
tokenSecretRef:
key: api-token
name: dnsimple-api-token
solverName: dnsimple
groupName: acme.jahanson.com

View file

@ -0,0 +1,14 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: cert-manager
resources:
- ./dnsimple/externalsecret.yaml
- ./dnsimple/issuer-letsencrypt-prod.yaml
- ./dnsimple/issuer-letsencrypt-staging.yaml
- ./dnsimple/dnsimple-issuer-rbac.yaml
- ./dnsimple/helmrelease.yaml
- ./cloudflare/externalsecret.yaml
- ./cloudflare/issuer-letsencrypt-prod.yaml
- ./cloudflare/issuer-letsencrypt-staging.yaml

View file

@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-cert-manager
namespace: flux-system
spec:
interval: 10m
path: "./kubernetes/apps/cert-manager/cert-manager/app"
prune: true
sourceRef:
kind: GitRepository
name: valinor
wait: true
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-cert-manager-issuers
namespace: flux-system
spec:
interval: 10m
path: "./kubernetes/apps/cert-manager/cert-manager/issuers"
prune: true
sourceRef:
kind: GitRepository
name: valinor
wait: false
dependsOn:
- name: cluster-apps-cert-manager

View file

@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./cert-manager/ks.yaml

View file

@ -0,0 +1,7 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: cert-manager
labels:
kustomize.toolkit.fluxcd.io/prune: disabled

View file

@ -0,0 +1,117 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: jellyfin
namespace: default
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 2.4.0
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
dependsOn:
- name: intel-device-plugins-gpu
namespace: system
values:
controllers:
main:
type: statefulset
annotations:
reloader.stakater.com/auto: "true"
containers:
main:
image:
repository: jellyfin/jellyfin
tag: 10.8.13
env:
DOTNET_SYSTEM_IO_DISABLEFILELOCKING: "true"
JELLYFIN_FFmpeg__probesize: 50000000
JELLYFIN_FFmpeg__analyzeduration: 50000000
JELLYFIN_PublishedServerUrl: jelly.hsn.dev
TZ: America/Chicago
probes:
liveness: &probes
enabled: true
custom: true
spec:
httpGet:
path: /health
port: &port 8096
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
readiness: *probes
startup:
enabled: false
resources:
requests:
gpu.intel.com/i915: 1
cpu: 100m
memory: 512Mi
limits:
gpu.intel.com/i915: 1
memory: 4Gi
pod:
enableServiceLinks: false
nodeSelector:
intel.feature.node.kubernetes.io/gpu: "true"
securityContext:
runAsUser: 568
runAsGroup: 568
fsGroup: 568
fsGroupChangePolicy: OnRootMismatch
supplementalGroups: [44, 105, 10000]
statefulset:
volumeClaimTemplates:
- name: config
accessMode: ReadWriteOnce
size: 50Gi
storageClass: ceph-block
globalMounts:
- path: /config
service:
main:
ports:
http:
port: *port
ingress:
main:
enabled: true
className: hsn-nginx
annotations:
hosts:
- host: &host "jelly.hsn.dev"
paths:
- path: /
service:
name: main
port: http
tls:
- hosts:
- *host
persistence:
transcode:
type: emptyDir
globalMounts:
- path: /transcode
media:
existingClaim: media
globalMounts:
- path: /media

View file

@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: default
resources:
- ./helmrelease.yaml
- ./pvc.yaml

View file

@ -0,0 +1,15 @@
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: media
namespace: default
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
storageClassName: ceph-block

View file

@ -0,0 +1,19 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-jellyfin
namespace: flux-system
spec:
dependsOn:
- name: cluster-apps-external-secrets-stores
path: ./kubernetes/apps/default/jellyfin/app
prune: true
sourceRef:
kind: GitRepository
name: valinor
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Flux-Kustomizations
- ./jellyfin/ks.yaml

View file

@ -0,0 +1,7 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: default
labels:
kustomize.toolkit.fluxcd.io/prune: disabled

View file

@ -0,0 +1,20 @@
apiVersion: v1
kind: Pod
metadata:
name: rocky-nessa
namespace: default
spec:
nodeName: nessa
containers:
- name: rocky
image: rockylinux:9
securityContext:
privileged: true
command: ["/bin/bash", "-c", "while true; do sleep 10; done"]
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 4000m
memory: 4000Mi

View file

@ -0,0 +1,20 @@
apiVersion: v1
kind: Pod
metadata:
name: rocky-nienna
namespace: default
spec:
nodeName: nienna
containers:
- name: rocky
image: rockylinux:9
securityContext:
privileged: true
command: ["/bin/bash", "-c", "while true; do sleep 10; done"]
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 4000m
memory: 4000Mi

View file

@ -0,0 +1,19 @@
apiVersion: v1
kind: Pod
metadata:
name: ubuntu
namespace: default
spec:
containers:
- name: ubuntu
image: ubuntu:latest
securityContext:
privileged: true
command: ["/bin/bash", "-c", "while true; do sleep 10; done"]
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 4000m
memory: 4000Mi

View file

@ -0,0 +1,34 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-flux-webhooks
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
interval: 10m
path: ./kubernetes/apps/flux-system/add-ons/webhooks
prune: true
sourceRef:
kind: GitRepository
name: valinor
wait: true
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-flux-monitoring
namespace: flux-system
labels:
substitution.flux.home.arpa/enabled: "true"
spec:
interval: 10m
path: ./kubernetes/apps/flux-system/add-ons/monitoring
prune: true
sourceRef:
kind: GitRepository
name: valinor
wait: true

View file

@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: flux-system
resources:
- ./podmonitor.yaml
- ./prometheusrule.yaml

View file

@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/podmonitor_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: flux-system
namespace: flux-system
labels:
app.kubernetes.io/part-of: flux
app.kubernetes.io/component: monitoring
spec:
namespaceSelector:
matchNames:
- flux-system
selector:
matchExpressions:
- key: app
operator: In
values:
- helm-controller
- source-controller
- kustomize-controller
- notification-controller
- image-automation-controller
- image-reflector-controller
podMetricsEndpoints:
- port: http-prom
relabelings:
# https://github.com/prometheus-operator/prometheus-operator/issues/4816
- sourceLabels: [__meta_kubernetes_pod_phase]
action: keep
regex: Running

View file

@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: flux-rules
namespace: flux-system
spec:
groups:
- name: flux.rules
rules:
- alert: FluxComponentAbsent
annotations:
summary: Flux component has disappeared from Prometheus target discovery.
expr: |
absent(up{job=~".*flux-system.*"} == 1)
for: 15m
labels:
severity: critical
- alert: FluxReconciliationFailure
annotations:
summary: >-
{{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation
has been failing for more than 15 minutes.
expr: |
max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind)
+
on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"})
by (namespace, name, kind)) * 2 == 1
for: 15m
labels:
severity: critical

View file

@ -0,0 +1,19 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/external-secrets.io/externalsecret_v1beta1.json
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: git-webhook-token
namespace: flux-system
spec:
secretStoreRef:
kind: ClusterSecretStore
name: onepassword-connect
target:
name: git-webhook-token
creationPolicy: Owner
data:
- secretKey: token
remoteRef:
key: flux
property: git_webhook_token

View file

@ -0,0 +1,24 @@
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: webhook-receiver
namespace: flux-system
annotations:
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
spec:
ingressClassName: "hsn-nginx"
rules:
- host: &host "flux-receiver-valinor.hsn.dev"
http:
paths:
- path: /hook/
pathType: Prefix
backend:
service:
name: webhook-receiver
port:
number: 80
tls:
- hosts:
- *host

View file

@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1beta1.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./externalsecret.yaml
- ./ingress.yaml
- ./receiver.yaml

View file

@ -0,0 +1,29 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/receiver-notification-v1.json
apiVersion: notification.toolkit.fluxcd.io/v1
kind: Receiver
metadata:
name: git-receiver
namespace: flux-system
spec:
type: github
events:
- "ping"
- "push"
secretRef:
name: git-webhook-token
resources:
- apiVersion: source.toolkit.fluxcd.io/v1
kind: GitRepository
name: "valinor"
namespace: "flux-system"
- apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
name: "cluster"
namespace: "flux-system"
- apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
name: "cluster-apps"
namespace: "flux-system"

View file

@ -0,0 +1,6 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./git

View file

@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1beta1.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./add-ons/ks.yaml

View file

@ -0,0 +1,7 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: flux-system
labels:
kustomize.toolkit.fluxcd.io/prune: disabled

View file

@ -0,0 +1,75 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/helmrelease-helm-v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: cilium
namespace: kube-system
spec:
interval: 30m
chart:
spec:
chart: cilium
version: 1.14.5
sourceRef:
kind: HelmRepository
name: cilium
namespace: flux-system
maxHistory: 2
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
cluster:
name: valinor
id: 1
hubble:
relay:
enabled: true
ui:
enabled: true
metrics:
# enabled: "{dns,drop,tcp,flow,port-distribution,icmp,httpV2:exemplars=true;labelsContext=source_ip,source_namespace,source_workload,destination_ip,destination_namespace,destination_workload,traffic_direction}"
enableOpenMetrics: true
prometheus:
enabled: true
operator:
prometheus:
enabled: true
ipam:
mode: kubernetes
policyEnforcementMode: always # enforce network policies
policyAuditMode: true # do not block traffic
hostFirewall:
enabled: true # enable host policies
extraConfig:
allow-localhost: policy # enable policies for localhost
kubeProxyReplacement: true
securityContext:
capabilities:
ciliumAgent:
- CHOWN
- KILL
- NET_ADMIN
- NET_RAW
- IPC_LOCK
- SYS_ADMIN
- SYS_RESOURCE
- DAC_OVERRIDE
- FOWNER
- SETGID
- SETUID
cleanCiliumState:
- NET_ADMIN
- SYS_ADMIN
- SYS_RESOURCE
k8sServiceHost: ${K8S_SERVICE_ENDPOINT}
k8sServicePort: 6443
rollOutCiliumPods: true

View file

@ -0,0 +1,9 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-ns-ingress
spec:
podSelector: {}
ingress:
- from:
- podSelector: {}

View file

@ -0,0 +1,23 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: allow-ssh
spec:
description: ""
nodeSelector:
matchLabels:
# node-access: ssh
node-role.kubernetes.io/control-plane: "true"
ingress:
- fromEntities:
- cluster
- toPorts:
- ports:
- port: "22"
protocol: TCP
- icmps:
- fields:
- type: 8
family: IPv4

View file

@ -0,0 +1,27 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: api-server
spec:
nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# load balancer -> api server
- fromCIDR:
- 167.235.217.82/32
toPorts:
- ports:
- port: '6443'
protocol: TCP
egress:
# api server -> kubelet
- toEntities:
- remote-node
toPorts:
- ports:
- port: '10250'
protocol: TCP

View file

@ -0,0 +1,41 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: cilium-health
specs:
- endpointSelector:
# apply to health endpoints
matchLabels:
reserved:health: ''
ingress:
# cilium agent -> cilium agent
- fromEntities:
- host
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP
- nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# cilium agent -> cilium agent
- fromEntities:
- health
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP
egress:
# cilium agent -> cilium agent
- toEntities:
- health
- remote-node
toPorts:
- ports:
- port: '4240'
protocol: TCP

View file

@ -0,0 +1,26 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: cilium-vxlan
spec:
nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# node -> vxlan
- fromEntities:
- remote-node
toPorts:
- ports:
- port: '8472'
protocol: UDP
egress:
# node -> vxlan
- toEntities:
- remote-node
toPorts:
- ports:
- port: '8472'
protocol: UDP

View file

@ -0,0 +1,65 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: core-dns
namespace: kube-system
specs:
- nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# core dns -> api server
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '6443'
protocol: TCP
- nodeSelector:
# apply to all nodes
matchLabels: {}
egress:
# kubelet -> core dns probes
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
toPorts:
- ports:
- port: '8080'
protocol: TCP
- port: '8181'
protocol: TCP
- endpointSelector:
# apply to core dns pods
matchLabels:
io.cilium.k8s.policy.serviceaccount: coredns
ingress:
# kubelet -> core dns probes
- fromEntities:
- host
toPorts:
- ports:
- port: '8080'
protocol: TCP
- port: '8181'
protocol: TCP
egress:
# core dns -> api server
- toEntities:
- kube-apiserver
toPorts:
- ports:
- port: '6443'
protocol: TCP
# core dns -> upstream DNS
- toCIDR:
- 185.12.64.1/32
- 185.12.64.2/32
toPorts:
- ports:
- port: '53'
protocol: UDP

View file

@ -0,0 +1,27 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: etcd
spec:
nodeSelector:
# apply to master nodes
matchLabels:
node-role.kubernetes.io/control-plane: 'true'
ingress:
# etcd peer -> etcd peer
- fromEntities:
- remote-node
toPorts:
- ports:
- port: '2380'
protocol: TCP
egress:
# etcd peer -> etcd peer
- toEntities:
- remote-node
toPorts:
- ports:
- port: '2380'
protocol: TCP

View file

@ -0,0 +1,15 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
---
apiVersion: "cilium.io/v2"
kind: CiliumClusterwideNetworkPolicy
metadata:
name: allow-specific-traffic
spec:
endpointSelector: {}
ingress:
- fromEntities:
- host
toPorts:
- ports:
- port: '6443'
protocol: TCP

View file

@ -0,0 +1,50 @@
# yaml-language-server: $schema=https://ks.hsn.dev/cilium.io/ciliumnetworkpolicy_v2.json
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: hubble-relay
namespace: kube-system
specs:
- nodeSelector:
# apply to all nodes
matchLabels: {}
ingress:
# hubble relay -> hubble agent
- fromEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4244'
protocol: TCP
egress:
# kubelet -> hubble relay probes
- toEndpoints:
- matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
toPorts:
- ports:
- port: '4245'
protocol: TCP
- endpointSelector:
# apply to hubble relay pods
matchLabels:
io.cilium.k8s.policy.serviceaccount: hubble-relay
ingress:
# kubelet -> hubble relay probes
- fromEntities:
- host
toPorts:
- ports:
- port: '4245'
protocol: TCP
egress:
# hubble relay -> hubble agent
- toEntities:
- host
- remote-node
toPorts:
- ports:
- port: '4244'
protocol: TCP

Some files were not shown because too many files have changed in this diff Show more