From c9b6c37ce9476be434fec572f1b18d38c267480a Mon Sep 17 00:00:00 2001 From: Joseph Hanson Date: Mon, 8 Jul 2024 10:53:12 -0500 Subject: [PATCH] re-organize Taskfiles --- .../.taskfiles}/rook/Taskfile.yaml | 0 .../.taskfiles}/rook/WipeDiskJob.tmpl.yaml | 0 .../rook/WipeRookDataJob.tmpl.yaml | 0 .../.taskfiles}/rook/pod.yaml | 0 .taskfiles/VolSync/Tasks.yaml | 158 ------------ .taskfiles/VolSync/UnlockJob.tmpl.yaml | 38 --- .taskfiles/VolSync/wait-for-job.sh | 14 -- .taskfiles/k8s/Taskfile.yaml | 56 ++++- .../Tasks.yaml => precommit/Taskfile.yaml} | 0 .taskfiles/volsync/Taskfile.yaml | 225 ++++++++++++++++++ .../resources/list.tmpl.yaml} | 11 +- .../replicationdestination.tmp.yaml} | 16 +- .taskfiles/volsync/resources/unlock.tmpl.yaml | 27 +++ .taskfiles/volsync/resources/wait-for-job.sh | 14 ++ .../volsync/resources/which-controller.sh | 22 ++ .../resources/wipe.tmpl.yaml} | 11 +- Taskfile.yaml | 106 +-------- shell.nix | 21 ++ 18 files changed, 392 insertions(+), 327 deletions(-) rename {.taskfiles => .archive/.taskfiles}/rook/Taskfile.yaml (100%) rename {.taskfiles => .archive/.taskfiles}/rook/WipeDiskJob.tmpl.yaml (100%) rename {.taskfiles => .archive/.taskfiles}/rook/WipeRookDataJob.tmpl.yaml (100%) rename {.taskfiles => .archive/.taskfiles}/rook/pod.yaml (100%) delete mode 100644 .taskfiles/VolSync/Tasks.yaml delete mode 100644 .taskfiles/VolSync/UnlockJob.tmpl.yaml delete mode 100644 .taskfiles/VolSync/wait-for-job.sh rename .taskfiles/{PreCommit/Tasks.yaml => precommit/Taskfile.yaml} (100%) create mode 100644 .taskfiles/volsync/Taskfile.yaml rename .taskfiles/{VolSync/ListJob.tmpl.yaml => volsync/resources/list.tmpl.yaml} (61%) rename .taskfiles/{VolSync/ReplicationDestination.tmpl.yaml => volsync/resources/replicationdestination.tmp.yaml} (68%) create mode 100644 .taskfiles/volsync/resources/unlock.tmpl.yaml create mode 100644 .taskfiles/volsync/resources/wait-for-job.sh create mode 100644 .taskfiles/volsync/resources/which-controller.sh rename .taskfiles/{VolSync/WipeJob.tmpl.yaml => volsync/resources/wipe.tmpl.yaml} (72%) diff --git a/.taskfiles/rook/Taskfile.yaml b/.archive/.taskfiles/rook/Taskfile.yaml similarity index 100% rename from .taskfiles/rook/Taskfile.yaml rename to .archive/.taskfiles/rook/Taskfile.yaml diff --git a/.taskfiles/rook/WipeDiskJob.tmpl.yaml b/.archive/.taskfiles/rook/WipeDiskJob.tmpl.yaml similarity index 100% rename from .taskfiles/rook/WipeDiskJob.tmpl.yaml rename to .archive/.taskfiles/rook/WipeDiskJob.tmpl.yaml diff --git a/.taskfiles/rook/WipeRookDataJob.tmpl.yaml b/.archive/.taskfiles/rook/WipeRookDataJob.tmpl.yaml similarity index 100% rename from .taskfiles/rook/WipeRookDataJob.tmpl.yaml rename to .archive/.taskfiles/rook/WipeRookDataJob.tmpl.yaml diff --git a/.taskfiles/rook/pod.yaml b/.archive/.taskfiles/rook/pod.yaml similarity index 100% rename from .taskfiles/rook/pod.yaml rename to .archive/.taskfiles/rook/pod.yaml diff --git a/.taskfiles/VolSync/Tasks.yaml b/.taskfiles/VolSync/Tasks.yaml deleted file mode 100644 index ca90589..0000000 --- a/.taskfiles/VolSync/Tasks.yaml +++ /dev/null @@ -1,158 +0,0 @@ ---- -version: "3" - -x-task-vars: &task-vars - rsrc: '{{.rsrc}}' - controller: '{{.controller}}' - namespace: '{{.namespace}}' - claim: '{{.claim}}' - ts: '{{.ts}}' - kustomization: '{{.kustomization}}' - previous: '{{.previous}}' - -vars: - destinationTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ReplicationDestination.tmpl.yaml" - wipeJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/WipeJob.tmpl.yaml" - waitForJobScript: "{{.ROOT_DIR}}/.taskfiles/VolSync/wait-for-job.sh" - listJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ListJob.tmpl.yaml" - unlockJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/UnlockJob.tmpl.yaml" - ts: '{{now | date "150405"}}' - -tasks: - - list: - desc: List all snapshots taken by restic for a given ReplicationSource (ex. task volsync:list rsrc=plex [namespace=default]) - silent: true - cmds: - - envsubst < <(cat {{.listJobTemplate}}) | kubectl apply -f - - - bash {{.waitForJobScript}} list-{{.rsrc}}-{{.ts}} {{.namespace}} - - kubectl -n {{.namespace}} wait job/list-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m - - kubectl -n {{.namespace}} logs job/list-{{.rsrc}}-{{.ts}} --container list - - kubectl -n {{.namespace}} delete job list-{{.rsrc}}-{{.ts}} - vars: - rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}' - namespace: '{{.namespace | default "default"}}' - env: *task-vars - preconditions: - # - sh: test -f {{.waitForJobScript}} - - sh: test -f {{.listJobTemplate}} - - unlock: - desc: Unlocks restic repository for a given ReplicationSource (ex. task volsync:unlock rsrc=plex [namespace=default]) - silent: true - cmds: - - envsubst < <(cat {{.unlockJobTemplate}}) | kubectl apply -f - - # - bash {{.waitForJobScript}} unlock-{{.rsrc}}-{{.ts}} {{.namespace}} - - kubectl -n {{.namespace}} wait job/unlock-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m - - kubectl -n {{.namespace}} logs job/unlock-{{.rsrc}}-{{.ts}} --container unlock - - kubectl -n {{.namespace}} delete job unlock-{{.rsrc}}-{{.ts}} - vars: - rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}' - namespace: '{{.namespace | default "default"}}' - env: *task-vars - preconditions: - # - sh: test -f {{.waitForJobScript}} - - sh: test -f {{.unlockJobTemplate}} - - # To run backup jobs in parallel for all replicationsources: - # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot rsrc=$0 namespace=$1' - # - snapshot: - desc: Trigger a Restic ReplicationSource snapshot (ex. task volsync:snapshot rsrc=plex [namespace=default]) - cmds: - - kubectl -n {{.namespace}} patch replicationsources {{.rsrc}} --type merge -p '{"spec":{"trigger":{"manual":"{{.ts}}"}}}' - - bash {{.waitForJobScript}} volsync-src-{{.rsrc}} {{.namespace}} - - kubectl -n {{.namespace}} wait job/volsync-src-{{.rsrc}} --for condition=complete --timeout=120m - # TODO: Find a way to output logs - # Error from server (NotFound): jobs.batch "volsync-src-zzztest" not found - # - kubectl -n {{.namespace}} logs job/volsync-src-{{.rsrc}} - vars: - rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}' - namespace: '{{.namespace | default "default"}}' - env: *task-vars - preconditions: - # - sh: test -f {{.waitForJobScript}} - - sh: kubectl -n {{.namespace}} get replicationsources {{.rsrc}} - msg: "ReplicationSource '{{.rsrc}}' not found in namespace '{{.namespace}}'" - - # To run restore jobs in parallel for all replicationdestinations: - # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=2 -l bash -c 'task volsync:restore rsrc=$0 namespace=$1' - # - restore: - desc: Trigger a Restic ReplicationSource restore (ex. task volsync:restore rsrc=plex [namespace=default]) - cmds: - - task: restore-suspend-app - vars: *task-vars - - task: restore-wipe-job - vars: *task-vars - - task: restore-volsync-job - vars: *task-vars - - task: restore-resume-app - vars: *task-vars - vars: - rsrc: '{{ or .rsrc (fail "Variable `rsrc` is required") }}' - namespace: '{{.namespace | default "default"}}' - # 1) Query to find the Flux Kustomization associated with the ReplicationSource (rsrc) - kustomization: - sh: | - kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \ - -o jsonpath="{.metadata.labels.kustomize\.toolkit\.fluxcd\.io/name}" - # 2) Query to find the Claim associated with the ReplicationSource (rsrc) - claim: - sh: | - kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \ - -o jsonpath="{.spec.sourcePVC}" - # 3) Query to find the controller associated with the PersistentVolumeClaim (claim) - controller: - sh: | - app=$(kubectl -n {{.namespace}} get persistentvolumeclaim {{.claim}} -o jsonpath="{.metadata.labels.app\.kubernetes\.io/name}") - if kubectl -n {{ .namespace }} get deployment.apps/$app >/dev/null 2>&1 ; then - echo "deployment.apps/$app" - else - echo "statefulset.apps/$app" - fi - previous: "{{.previous | default 2}}" - env: *task-vars - preconditions: - - sh: test -f {{.wipeJobTemplate}} - - sh: test -f {{.destinationTemplate}} - # - sh: test -f {{.waitForJobScript}} - - # Suspend the Flux ks and hr - restore-suspend-app: - internal: true - cmds: - - flux -n flux-system suspend kustomization {{.kustomization}} - - flux -n {{.namespace}} suspend helmrelease {{.rsrc}} - - kubectl -n {{.namespace}} scale {{.controller}} --replicas 0 - - kubectl -n {{.namespace}} wait pod --for delete --selector="app.kubernetes.io/name={{.rsrc}}" --timeout=2m - env: *task-vars - - # Wipe the PVC of all data - restore-wipe-job: - internal: true - cmds: - - envsubst < <(cat {{.wipeJobTemplate}}) | kubectl apply -f - - - bash {{.waitForJobScript}} wipe-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}} - - kubectl -n {{.namespace}} wait job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m - - kubectl -n {{.namespace}} logs job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --container wipe - - kubectl -n {{.namespace}} delete job wipe-{{.rsrc}}-{{.claim}}-{{.ts}} - env: *task-vars - - # Create VolSync replicationdestination CR to restore data - restore-volsync-job: - internal: true - cmds: - - envsubst < <(cat {{.destinationTemplate}}) | kubectl apply -f - - - bash {{.waitForJobScript}} volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}} - - kubectl -n {{.namespace}} wait job/volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m - - kubectl -n {{.namespace}} delete replicationdestination {{.rsrc}}-{{.claim}}-{{.ts}} - env: *task-vars - - # Resume Flux ks and hr - restore-resume-app: - internal: true - cmds: - - flux -n {{.namespace}} resume helmrelease {{.rsrc}} - - flux -n flux-system resume kustomization {{.kustomization}} - env: *task-vars diff --git a/.taskfiles/VolSync/UnlockJob.tmpl.yaml b/.taskfiles/VolSync/UnlockJob.tmpl.yaml deleted file mode 100644 index f38d8e9..0000000 --- a/.taskfiles/VolSync/UnlockJob.tmpl.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: "unlock-${rsrc}-${ts}" - namespace: "${namespace}" -spec: - ttlSecondsAfterFinished: 3600 - template: - spec: - automountServiceAccountToken: false - restartPolicy: OnFailure - containers: - - name: unlock - image: docker.io/restic/restic:0.16.0 - args: ["unlock", "--remove-all"] - envFrom: - - secretRef: - name: "${rsrc}-volsync-r2-secret" ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: "unlock-${rsrc}-r2-${ts}" - namespace: "${namespace}" -spec: - ttlSecondsAfterFinished: 3600 - template: - spec: - automountServiceAccountToken: false - restartPolicy: OnFailure - containers: - - name: unlock - image: docker.io/restic/restic:0.16.0 - args: ["unlock", "--remove-all"] - envFrom: - - secretRef: - name: "${rsrc}-volsync-secret" diff --git a/.taskfiles/VolSync/wait-for-job.sh b/.taskfiles/VolSync/wait-for-job.sh deleted file mode 100644 index 32feadd..0000000 --- a/.taskfiles/VolSync/wait-for-job.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -JOB_NAME=$1 -NAMESPACE="${2:-default}" - -[[ -z "${JOB_NAME}" ]] && echo "Job name not specified" && exit 1 - -while true; do - STATUS="$(kubectl -n "${NAMESPACE}" get pod -l job-name="${JOB_NAME}" -o jsonpath='{.items[*].status.phase}')" - if [ "${STATUS}" == "Pending" ]; then - break - fi - sleep 1 -done diff --git a/.taskfiles/k8s/Taskfile.yaml b/.taskfiles/k8s/Taskfile.yaml index 541afc1..4984784 100644 --- a/.taskfiles/k8s/Taskfile.yaml +++ b/.taskfiles/k8s/Taskfile.yaml @@ -9,4 +9,58 @@ tasks: hubble-ui: desc: port-forward hubble to 8888 cmds: - - kubectl port-forward -n kube-system svc/hubble-ui 8888:80 \ No newline at end of file + - kubectl port-forward -n kube-system svc/hubble-ui 8888:80 + sync-secrets: + desc: Sync ExternalSecret resources + vars: + secret: '{{ .secret | default ""}}' + namespace: '{{.namespace | default "default"}}' + cmd: | + {{if eq .secret ""}} + kubectl get externalsecret.external-secrets.io --all-namespaces --no-headers -A | awk '{print $1, $2}' \ + | xargs --max-procs=4 -l bash -c 'kubectl -n $0 annotate externalsecret.external-secrets.io $1 force-sync=$(date +%s) --overwrite' + {{else}} + kubectl -n {{.namespace}} annotate externalsecret.external-secrets.io {{.secret}} force-sync=$(date +%s) --overwrite + {{end}} + preconditions: + - kubectl -n {{.namespace}} get externalsecret {{.secret}} + mount-volume: + desc: Mount a PersistentVolumeClaim to a temporary pod + interactive: true + vars: + claim: '{{ or .claim (fail "PersistentVolumeClaim `claim` is required") }}' + namespace: '{{.namespace | default "default"}}' + cmd: | + kubectl run -n {{.namespace}} debug-{{.claim}} -i --tty --rm --image=null --privileged --overrides=' + { + "apiVersion": "v1", + "spec": { + "containers": [ + { + "name": "debug", + "image": "docker.io/library/alpine:latest", + "command": ["/bin/ash"], + "stdin": true, + "stdinOnce": true, + "tty": true, + "volumeMounts": [ + { + "name": "config", + "mountPath": "/config" + } + ] + } + ], + "volumes": [ + { + "name": "config", + "persistentVolumeClaim": { + "claimName": "{{.claim}}" + } + } + ], + "restartPolicy": "Never" + } + }' + preconditions: + - kubectl -n {{.namespace}} get pvc {{.claim}} diff --git a/.taskfiles/PreCommit/Tasks.yaml b/.taskfiles/precommit/Taskfile.yaml similarity index 100% rename from .taskfiles/PreCommit/Tasks.yaml rename to .taskfiles/precommit/Taskfile.yaml diff --git a/.taskfiles/volsync/Taskfile.yaml b/.taskfiles/volsync/Taskfile.yaml new file mode 100644 index 0000000..7d5d979 --- /dev/null +++ b/.taskfiles/volsync/Taskfile.yaml @@ -0,0 +1,225 @@ +--- +# yaml-language-server: $schema=https://taskfile.dev/schema.json +version: "3" + +# This taskfile is used to manage certain VolSync tasks for a given application, limitations are described below. +# 1. Fluxtomization, HelmRelease, PVC, ReplicationSource all have the same name (e.g. plex) +# 2. ReplicationSource and ReplicationDestination are a Restic repository +# 3. Applications are deployed as either a Kubernetes Deployment or StatefulSet +# 4. Each application only has one PVC that is being replicated + +x-env-vars: &env-vars + app: "{{.app}}" + claim: "{{.claim}}" + controller: "{{.controller}}" + job: "{{.job}}" + ns: "{{.ns}}" + pgid: "{{.pgid}}" + previous: "{{.previous}}" + puid: "{{.puid}}" + +vars: + VOLSYNC_RESOURCES_DIR: "{{.ROOT_DIR}}/.taskfiles/volsync/resources" + +tasks: + + state-*: + desc: Suspend or Resume Volsync + summary: | + cluster: Cluster to run command against (required) + state: resume or suspend (required) + cmds: + - flux --context {{.cluster}} {{.state}} kustomization volsync + - flux --context {{.cluster}} -n {{.ns}} {{.state}} helmrelease volsync + - kubectl --context {{.cluster}} -n {{.ns}} scale deployment volsync --replicas {{if eq "suspend" .state}}0{{else}}1{{end}} + env: *env-vars + vars: + ns: '{{.ns | default "volsync-system"}}' + state: '{{index .MATCH 0}}' + requires: + vars: ["cluster"] + + list: + desc: List snapshots for an application + summary: | + cluster: Cluster to run command against (required) + ns: Namespace the PVC is in (default: default) + app: Application to list snapshots for (required) + cmds: + - $GOPATH/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/list.tmpl.yaml) | kubectl --context {{.cluster}} apply -f - + - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} + - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=1m + - kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container main + - kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}} + env: *env-vars + requires: + vars: ["cluster", "app"] + vars: + ns: '{{.ns | default "default"}}' + job: volsync-list-{{.app}} + preconditions: + - test -f $GOPATH/bin/envsubst + - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh + - test -f {{.VOLSYNC_RESOURCES_DIR}}/list.tmpl.yaml + silent: true + + unlock: + desc: Unlock a Restic repository for an application + summary: | + cluster: Cluster to run command against (required) + ns: Namespace the PVC is in (default: default) + app: Application to unlock (required) + cmds: + - $GOPATH/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/unlock.tmpl.yaml) | kubectl --context {{.cluster}} apply -f - + - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} + - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=1m + - kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container minio + - kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container r2 + - kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}} + env: *env-vars + requires: + vars: ["cluster", "app"] + vars: + ns: '{{.ns | default "default"}}' + job: volsync-unlock-{{.app}} + preconditions: + - test -f $GOPATH/bin/envsubst + - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh + - test -f {{.VOLSYNC_RESOURCES_DIR}}/unlock.tmpl.yaml + silent: true + + # To run backup jobs in parallel for all replicationsources: + # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot app=$0 ns=$1' + snapshot: + desc: Snapshot a PVC for an application + summary: | + cluster: Cluster to run command against (required) + ns: Namespace the PVC is in (default: default) + app: Application to snapshot (required) + cmds: + - kubectl --context {{.cluster}} -n {{.ns}} patch replicationsources {{.app}} --type merge -p '{"spec":{"trigger":{"manual":"{{.now}}"}}}' + - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} + - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m + env: *env-vars + requires: + vars: ["cluster", "app"] + vars: + now: '{{now | date "150405"}}' + ns: '{{.ns | default "default"}}' + job: volsync-src-{{.app}} + controller: + sh: true && {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh {{.app}} {{.ns}} {{.cluster}} + preconditions: + - test -f {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh + - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh + - kubectl --context {{.cluster}} -n {{.ns}} get replicationsources {{.app}} + + # To run restore jobs in parallel for all replicationdestinations: + # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:restore app=$0 ns=$1' + restore: + desc: Restore a PVC for an application + summary: | + cluster: Cluster to run command against (required) + ns: Namespace the PVC is in (default: default) + app: Application to restore (required) + previous: Previous number of snapshots to restore (default: 2) + cmds: + - { task: .suspend, vars: *env-vars } + - { task: .wipe, vars: *env-vars } + - { task: .restore, vars: *env-vars } + - { task: .resume, vars: *env-vars } + env: *env-vars + requires: + vars: ["cluster", "app"] + vars: + ns: '{{.ns | default "default"}}' + previous: '{{.previous | default 2}}' + controller: + sh: "{{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh {{.app}} {{.ns}}" + claim: + sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.sourcePVC}" + puid: + sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsUser}" + pgid: + sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsGroup}" + preconditions: + - test -f {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh + + cleanup: + desc: Delete volume populator PVCs in all namespaces + summary: | + cluster: Cluster to run command against (required) + cmds: + - for: { var: dest } + cmd: | + {{- $items := (split "/" .ITEM) }} + kubectl --context {{.cluster}} delete pvc -n {{ $items._0 }} {{ $items._1 }} + - for: { var: cache } + cmd: | + {{- $items := (split "/" .ITEM) }} + kubectl --context {{.cluster}} delete pvc -n {{ $items._0 }} {{ $items._1 }} + - for: { var: snaps } + cmd: | + {{- $items := (split "/" .ITEM) }} + kubectl --context {{.cluster}} delete volumesnapshot -n {{ $items._0 }} {{ $items._1 }} + env: *env-vars + requires: + vars: ["cluster"] + vars: + dest: + sh: kubectl --context {{.cluster}} get pvc --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}' + cache: + sh: kubectl --context {{.cluster}} get pvc --all-namespaces --no-headers | grep "dst-cache" | awk '{print $1 "/" $2}' + snaps: + sh: kubectl --context {{.cluster}} get volumesnapshot --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}' + + # Suspend the Flux ks and hr + .suspend: + internal: true + cmds: + - flux --context {{.cluster}} -n flux-system suspend kustomization {{.app}} + - flux --context {{.cluster}} -n {{.ns}} suspend helmrelease {{.app}} + - kubectl --context {{.cluster}} -n {{.ns}} scale {{.controller}} --replicas 0 + - kubectl --context {{.cluster}} -n {{.ns}} wait pod --for delete --selector="app.kubernetes.io/name={{.app}}" --timeout=2m + env: *env-vars + + # Wipe the PVC of all data + .wipe: + internal: true + cmds: + - $GOPATH/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/wipe.tmpl.yaml) | kubectl --context {{.cluster}} apply -f - + - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} + - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m + - kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container main + - kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}} + env: *env-vars + vars: + job: volsync-wipe-{{.app}} + preconditions: + - test -f $GOPATH/bin/envsubst + - test -f {{.VOLSYNC_RESOURCES_DIR}}/wipe.tmpl.yaml + - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh + + # Create VolSync replicationdestination CR to restore data + .restore: + internal: true + cmds: + - $GOPATH/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.tmpl.yaml) | kubectl --context {{.cluster}} apply -f - + - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} + - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m + - kubectl --context {{.cluster}} -n {{.ns}} delete replicationdestination {{.job}} + env: *env-vars + vars: + job: volsync-dst-{{.app}} + preconditions: + - test -f $GOPATH/bin/envsubst + - test -f {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.tmpl.yaml + - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh + + # Resume Flux ks and hr + .resume: + internal: true + cmds: + - flux --context {{.cluster}} -n {{.ns}} resume helmrelease {{.app}} + - flux --context {{.cluster}} -n flux-system resume kustomization {{.app}} + env: *env-vars diff --git a/.taskfiles/VolSync/ListJob.tmpl.yaml b/.taskfiles/volsync/resources/list.tmpl.yaml similarity index 61% rename from .taskfiles/VolSync/ListJob.tmpl.yaml rename to .taskfiles/volsync/resources/list.tmpl.yaml index 0d63998..a5b08eb 100644 --- a/.taskfiles/VolSync/ListJob.tmpl.yaml +++ b/.taskfiles/volsync/resources/list.tmpl.yaml @@ -2,8 +2,8 @@ apiVersion: batch/v1 kind: Job metadata: - name: "list-${rsrc}-${ts}" - namespace: "${namespace}" + name: ${job} + namespace: ${ns} spec: ttlSecondsAfterFinished: 3600 template: @@ -11,9 +11,10 @@ spec: automountServiceAccountToken: false restartPolicy: OnFailure containers: - - name: list - image: docker.io/restic/restic:0.16.0 + - name: main + image: docker.io/restic/restic:latest args: ["snapshots"] envFrom: - secretRef: - name: "${rsrc}-restic-secret" + name: ${app}-volsync-secret + resources: {} diff --git a/.taskfiles/VolSync/ReplicationDestination.tmpl.yaml b/.taskfiles/volsync/resources/replicationdestination.tmp.yaml similarity index 68% rename from .taskfiles/VolSync/ReplicationDestination.tmpl.yaml rename to .taskfiles/volsync/resources/replicationdestination.tmp.yaml index 0f7e02c..92d266b 100644 --- a/.taskfiles/VolSync/ReplicationDestination.tmpl.yaml +++ b/.taskfiles/volsync/resources/replicationdestination.tmp.yaml @@ -2,16 +2,18 @@ apiVersion: volsync.backube/v1alpha1 kind: ReplicationDestination metadata: - name: "${rsrc}-${claim}-${ts}" - namespace: "${namespace}" + name: ${job} + namespace: ${ns} spec: trigger: manual: restore-once restic: - repository: "${rsrc}-restic-secret" - destinationPVC: "${claim}" + repository: ${app}-volsync-secret + destinationPVC: ${claim} copyMethod: Direct - storageClassName: openebs-zfs + storageClassName: ceph-block + # storageClassName: ceph-filesystem + # accessModes: ["ReadWriteMany"] # IMPORTANT NOTE: # Set to the last X number of snapshots to restore from previous: ${previous} @@ -23,3 +25,7 @@ spec: # Do not restore snapshots made after the following RFC3339 Timestamp. # date --rfc-3339=seconds (--utc) # restoreAsOf: "2022-12-10T16:00:00-05:00" + moverSecurityContext: + runAsUser: ${puid} + runAsGroup: ${pgid} + fsGroup: ${pgid} diff --git a/.taskfiles/volsync/resources/unlock.tmpl.yaml b/.taskfiles/volsync/resources/unlock.tmpl.yaml new file mode 100644 index 0000000..7afc697 --- /dev/null +++ b/.taskfiles/volsync/resources/unlock.tmpl.yaml @@ -0,0 +1,27 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: ${job} + namespace: ${ns} +spec: + ttlSecondsAfterFinished: 3600 + template: + spec: + automountServiceAccountToken: false + restartPolicy: OnFailure + containers: + - name: minio + image: docker.io/restic/restic:latest + args: ["unlock", "--remove-all"] + envFrom: + - secretRef: + name: ${app}-volsync-secret + resources: {} + - name: r2 + image: docker.io/restic/restic:latest + args: ["unlock", "--remove-all"] + envFrom: + - secretRef: + name: ${app}-volsync-r2-secret + resources: {} diff --git a/.taskfiles/volsync/resources/wait-for-job.sh b/.taskfiles/volsync/resources/wait-for-job.sh new file mode 100644 index 0000000..aaf6d17 --- /dev/null +++ b/.taskfiles/volsync/resources/wait-for-job.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +JOB=$1 +NAMESPACE="${2:-default}" +CLUSTER="${3:-homelab}" + +[[ -z "${JOB}" ]] && echo "Job name not specified" && exit 1 +while true; do + STATUS="$(kubectl --context "${CLUSTER}" -n "${NAMESPACE}" get pod -l job-name="${JOB}" -o jsonpath='{.items[*].status.phase}')" + if [ "${STATUS}" == "Pending" ]; then + break + fi + sleep 1 +done diff --git a/.taskfiles/volsync/resources/which-controller.sh b/.taskfiles/volsync/resources/which-controller.sh new file mode 100644 index 0000000..d11b6b2 --- /dev/null +++ b/.taskfiles/volsync/resources/which-controller.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +APP=$1 +NAMESPACE="${2:-default}" +CLUSTER="${3:-homelab}" + +is_deployment() { + kubectl --context "${CLUSTER}" -n "${NAMESPACE}" get deployment "${APP}" >/dev/null 2>&1 +} + +is_statefulset() { + kubectl --context "${CLUSTER}" -n "${NAMESPACE}" get statefulset "${APP}" >/dev/null 2>&1 +} + +if is_deployment; then + echo "deployment.apps/${APP}" +elif is_statefulset; then + echo "statefulset.apps/${APP}" +else + echo "No deployment or statefulset found for ${APP}" + exit 1 +fi diff --git a/.taskfiles/VolSync/WipeJob.tmpl.yaml b/.taskfiles/volsync/resources/wipe.tmpl.yaml similarity index 72% rename from .taskfiles/VolSync/WipeJob.tmpl.yaml rename to .taskfiles/volsync/resources/wipe.tmpl.yaml index eb878b0..ffc1cc7 100644 --- a/.taskfiles/VolSync/WipeJob.tmpl.yaml +++ b/.taskfiles/volsync/resources/wipe.tmpl.yaml @@ -2,8 +2,8 @@ apiVersion: batch/v1 kind: Job metadata: - name: "wipe-${rsrc}-${claim}-${ts}" - namespace: "${namespace}" + name: ${job} + namespace: ${ns} spec: ttlSecondsAfterFinished: 3600 template: @@ -11,15 +11,16 @@ spec: automountServiceAccountToken: false restartPolicy: OnFailure containers: - - name: wipe - image: public.ecr.aws/docker/library/busybox:latest + - name: main + image: docker.io/library/alpine:latest command: ["/bin/sh", "-c", "cd /config; find . -delete"] volumeMounts: - name: config mountPath: /config securityContext: privileged: true + resources: {} volumes: - name: config persistentVolumeClaim: - claimName: "${claim}" + claimName: ${claim} diff --git a/Taskfile.yaml b/Taskfile.yaml index 7bcc27a..3bb0a73 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -13,108 +13,12 @@ env: K8S_AUTH_KUBECONFIG: "{{.ROOT_DIR}}/kubeconfig" includes: - volsync: .taskfiles/VolSync/Tasks.yaml - precommit: .taskfiles/PreCommit/Tasks.yaml - k8s: .taskfiles/k8s/Taskfile.yaml - rook: - taskfile: ".taskfiles/rook" - dir: .taskfiles/rook - flux: - dir: .taskfiles/flux - taskfile: .taskfiles/flux - talos: - taskfile: ".taskfiles/talos" - dir: .taskfiles/talos + volsync: .taskfiles/volsync + precommit: .taskfiles/precommit + k8s: .taskfiles/k8s + flux: .taskfiles/flux + talos: .taskfiles/talos tasks: default: - silent: true cmds: ["task -l"] - - init: - desc: Initialize workstation dependencies with Brew - cmds: - - brew install {{.DEPS}} {{.CLI_ARGS}} - preconditions: - - sh: command -v brew - msg: | - Homebrew is not installed. Using MacOS, Linux or WSL? - Head over to https://brew.sh to get up and running. - vars: - DEPS: >- - age - ansible - cilium-cli - direnv - derailed/k9s/k9s - fluxcd/tap/flux - go-task/tap/go-task - helm - ipcalc - jq - kubernetes-cli - kustomize - pre-commit - prettier - shellcheck - sops - stern - talhelper - yamllint - yq - - sync-secrets: - desc: Sync ExternalSecret resources - vars: - secret: '{{ .secret | default ""}}' - namespace: '{{.namespace | default "default"}}' - cmd: | - {{if eq .secret ""}} - kubectl get externalsecret.external-secrets.io --all-namespaces --no-headers -A | awk '{print $1, $2}' \ - | xargs --max-procs=4 -l bash -c 'kubectl -n $0 annotate externalsecret.external-secrets.io $1 force-sync=$(date +%s) --overwrite' - {{else}} - kubectl -n {{.namespace}} annotate externalsecret.external-secrets.io {{.secret}} force-sync=$(date +%s) --overwrite - {{end}} - preconditions: - - kubectl -n {{.namespace}} get externalsecret {{.secret}} - - mount-volume: - desc: Mount a PersistentVolumeClaim to a temporary pod - interactive: true - vars: - claim: '{{ or .claim (fail "PersistentVolumeClaim `claim` is required") }}' - namespace: '{{.namespace | default "default"}}' - cmd: | - kubectl run -n {{.namespace}} debug-{{.claim}} -i --tty --rm --image=null --privileged --overrides=' - { - "apiVersion": "v1", - "spec": { - "containers": [ - { - "name": "debug", - "image": "docker.io/library/alpine:3.20.0", - "command": ["/bin/ash"], - "stdin": true, - "stdinOnce": true, - "tty": true, - "volumeMounts": [ - { - "name": "config", - "mountPath": "/config" - } - ] - } - ], - "volumes": [ - { - "name": "config", - "persistentVolumeClaim": { - "claimName": "{{.claim}}" - } - } - ], - "restartPolicy": "Never" - } - }' - preconditions: - - kubectl -n {{.namespace}} get pvc {{.claim}} diff --git a/shell.nix b/shell.nix index b1a5e94..9649232 100644 --- a/shell.nix +++ b/shell.nix @@ -15,4 +15,25 @@ pkgs.mkShell { kubevirt fluxcd ]; + # Possible inputs needed. Keeping here for posterity + # age + # ansible + # cilium-cli + # direnv + # derailed/k9s/k9s + # fluxcd/tap/flux + # go-task/tap/go-task + # helm + # ipcalc + # jq + # kubernetes-cli + # kustomize + # pre-commit + # prettier + # shellcheck + # sops + # stern + # talhelper + # yamllint + # yq }