diff --git a/.taskfiles/volsync/Taskfile.yaml b/.taskfiles/volsync/Taskfile.yaml index aff33358..c910a69d 100644 --- a/.taskfiles/volsync/Taskfile.yaml +++ b/.taskfiles/volsync/Taskfile.yaml @@ -1,6 +1,6 @@ --- # yaml-language-server: $schema=https://taskfile.dev/schema.json -version: "3" +version: '3' # This taskfile is used to manage certain VolSync tasks for a given application, limitations are described below. # 1. Fluxtomization, HelmRelease, PVC, ReplicationSource all have the same name (e.g. plex) @@ -8,215 +8,129 @@ version: "3" # 3. Applications are deployed as either a Kubernetes Deployment or StatefulSet # 4. Each application only has one PVC that is being replicated -x-env-vars: &env-vars - app: "{{.app}}" - claim: "{{.claim}}" - controller: "{{.controller}}" - job: "{{.job}}" - ns: "{{.ns}}" - pgid: "{{.pgid}}" - previous: "{{.previous}}" - puid: "{{.puid}}" - vars: - VOLSYNC_RESOURCES_DIR: "{{.ROOT_DIR}}/.taskfiles/volsync/resources" + VOLSYNC_RESOURCES_DIR: '{{.ROOT_DIR}}/.taskfiles/volsync/resources' tasks: + state-*: desc: Suspend or Resume Volsync - summary: | - state: resume or suspend (required) - dotenv: ['{{.VOLSYNC_RESOURCES_DIR}}/.env'] + summary: |- + CLUSTER: Cluster to run command against (default: main) + STATE: resume or suspend (required) cmds: - - flux --context $CLUSTER {{.state}} kustomization volsync - - flux --context $CLUSTER -n {{.ns}} {{.state}} helmrelease volsync - - kubectl --context $CLUSTER -n {{.ns}} scale deployment volsync --replicas {{if eq "suspend" .state}}0{{else}}1{{end}} - env: *env-vars + # - until kubectl wait jobs --all --all-namespaces --for=condition=complete --timeout=5m &>/dev/null; do sleep 5; done + - flux {{.STATE}} kustomization volsync + - flux --namespace {{.NS}} {{.STATE}} helmrelease volsync + - kubectl --namespace {{.NS}} scale deployment --all --replicas {{if eq .STATE "suspend"}}0{{else}}1{{end}} vars: - ns: '{{.ns | default "volsync-system"}}' - state: '{{index .MATCH 0}}' - - list: - desc: List snapshots for an application - summary: | - ns: Namespace the PVC is in (default: default) - app: Application to list snapshots for (required) - dotenv: ['{{.VOLSYNC_RESOURCES_DIR}}/.env'] - cmds: - - /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/list.tmpl.yaml) | kubectl --context $CLUSTER apply -f - - - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} $CLUSTER - - kubectl --context $CLUSTER -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=1m - - kubectl --context $CLUSTER -n {{.ns}} logs job/{{.job}} --container main - - kubectl --context $CLUSTER -n {{.ns}} delete job {{.job}} - env: *env-vars + NS: '{{.NS | default "volsync-system"}}' + STATE: '{{index .MATCH 0}}' requires: - vars: ["app"] - vars: - ns: '{{.ns | default "default"}}' - job: volsync-list-{{.app}} - preconditions: - - test -f /etc/profiles/per-user/jahanson/bin/envsubst - - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh - - test -f {{.VOLSYNC_RESOURCES_DIR}}/list.tmpl.yaml - silent: true + vars: [CLUSTER] unlock: - desc: Unlock a Restic repository for an application - summary: | - ns: Namespace the PVC is in (default: default) - app: Application to unlock (required) - dotenv: ['{{.VOLSYNC_RESOURCES_DIR}}/.env'] - cmds: - - /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/unlock.tmpl.yaml) | kubectl --context $CLUSTER apply -f - - - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} $CLUSTER - - kubectl --context $CLUSTER -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=1m - - kubectl --context $CLUSTER -n {{.ns}} logs job/{{.job}} --container minio - - kubectl --context $CLUSTER -n {{.ns}} logs job/{{.job}} --container r2 - - kubectl --context $CLUSTER -n {{.ns}} delete job {{.job}} - env: *env-vars + desc: Unlock all Restic repositories + summary: |- + CLUSTER: Cluster to run command against (default: main) + cmd: > + kubectl get replicationsources --all-namespaces --no-headers -A | awk '{print $1, $2}' + | xargs --max-procs=2 -l bash -c 'kubectl --namespace "$0" patch --field-manager=flux-client-side-apply replicationsources "$1" --type merge --patch "{\"spec\":{\"restic\":{\"unlock\":\"{{now | unixEpoch}}\"}}}"' requires: - vars: ["app"] - vars: - ns: '{{.ns | default "default"}}' - job: volsync-unlock-{{.app}} - preconditions: - - test -f /etc/profiles/per-user/jahanson/bin/envsubst - - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh - - test -f {{.VOLSYNC_RESOURCES_DIR}}/unlock.tmpl.yaml - silent: true + vars: [CLUSTER] # To run backup jobs in parallel for all replicationsources: - # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot app=$0 ns=$1' + # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot APP=$0 NS=$1' snapshot: - desc: Snapshot a PVC for an application - summary: | - cluster: Cluster to run command against (required) - ns: Namespace the PVC is in (default: default) - app: Application to snapshot (required) + desc: Snapshot an application + summary: |- + CLUSTER: Cluster to run command against (default: main) + NS: Namespace the application is in (default: default) + APP: Application to snapshot (required) cmds: - - kubectl --context {{.cluster}} -n {{.ns}} patch replicationsources {{.app}} --type merge -p '{"spec":{"trigger":{"manual":"{{.now}}"}}}' - - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} - - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m - env: *env-vars - requires: - vars: ["cluster", "app"] + - kubectl --namespace {{.NS}} patch replicationsources {{.APP}} --type merge -p '{"spec":{"trigger":{"manual":"{{now | unixEpoch}}"}}}' + - until kubectl --namespace {{.NS}} get job/{{.JOB}} &>/dev/null; do sleep 5; done + - kubectl --namespace {{.NS}} wait job/{{.JOB}} --for=condition=complete --timeout=120m vars: - now: '{{now | date "150405"}}' - ns: '{{.ns | default "default"}}' - job: volsync-src-{{.app}} - controller: - sh: true && {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh {{.app}} {{.ns}} {{.cluster}} + NS: '{{.NS | default "default"}}' + JOB: volsync-src-{{.APP}} + requires: + vars: [CLUSTER, APP] preconditions: - - test -f {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh - - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh - - kubectl --context {{.cluster}} -n {{.ns}} get replicationsources {{.app}} + - kubectl --namespace {{.NS}} get replicationsources {{.APP}} # To run restore jobs in parallel for all replicationdestinations: - # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:restore app=$0 ns=$1' + # - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:restore APP=$0 NS=$1' restore: - desc: Restore a PVC for an application - summary: | - cluster: Cluster to run command against (required) - ns: Namespace the PVC is in (default: default) - app: Application to restore (required) - previous: Previous number of snapshots to restore (default: 2) + desc: Restore an application + summary: |- + CLUSTER: Cluster to run command against (default: main) + NS: Namespace the application is in (default: default) + APP: Application to restore (required) + PREVIOUS: Previous number of snapshots to restore (default: 2) cmds: - - { task: .suspend, vars: *env-vars } - - { task: .wipe, vars: *env-vars } - - { task: .restore, vars: *env-vars } - - { task: .resume, vars: *env-vars } - env: *env-vars + - task: .suspend + - task: .restore + - task: .resume requires: - vars: ["cluster", "app"] - vars: - ns: '{{.ns | default "default"}}' - previous: '{{.previous | default 2}}' - controller: - sh: "{{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh {{.app}} {{.ns}}" - claim: - sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.sourcePVC}" - puid: - sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsUser}" - pgid: - sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsGroup}" - preconditions: - - test -f {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh + vars: [CLUSTER, APP] - cleanup: - desc: Delete volume populator PVCs in all namespaces - summary: | - cluster: Cluster to run command against (required) - cmds: - - for: { var: dest } - cmd: | - {{- $items := (split "/" .ITEM) }} - kubectl --context {{.cluster}} delete pvc -n {{ $items._0 }} {{ $items._1 }} - - for: { var: cache } - cmd: | - {{- $items := (split "/" .ITEM) }} - kubectl --context {{.cluster}} delete pvc -n {{ $items._0 }} {{ $items._1 }} - - for: { var: snaps } - cmd: | - {{- $items := (split "/" .ITEM) }} - kubectl --context {{.cluster}} delete volumesnapshot -n {{ $items._0 }} {{ $items._1 }} - env: *env-vars - requires: - vars: ["cluster"] - vars: - dest: - sh: kubectl --context {{.cluster}} get pvc --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}' - cache: - sh: kubectl --context {{.cluster}} get pvc --all-namespaces --no-headers | grep "dst-cache" | awk '{print $1 "/" $2}' - snaps: - sh: kubectl --context {{.cluster}} get volumesnapshot --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}' - - # Suspend the Flux ks and hr .suspend: internal: true cmds: - - flux --context {{.cluster}} -n flux-system suspend kustomization {{.app}} - - flux --context {{.cluster}} -n {{.ns}} suspend helmrelease {{.app}} - - kubectl --context {{.cluster}} -n {{.ns}} scale {{.controller}} --replicas 0 - - kubectl --context {{.cluster}} -n {{.ns}} wait pod --for delete --selector="app.kubernetes.io/name={{.app}}" --timeout=2m - env: *env-vars - - # Wipe the PVC of all data - .wipe: - internal: true - cmds: - - /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/wipe.tmpl.yaml) | kubectl --context {{.cluster}} apply -f - - - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} - - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m - - kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container main - - kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}} - env: *env-vars + - flux --namespace flux-system suspend kustomization {{.APP}} + - flux --namespace {{.NS}} suspend helmrelease {{.APP}} + - kubectl --namespace {{.NS}} scale {{.CONTROLLER}}/{{.APP}} --replicas 0 + - kubectl --namespace {{.NS}} wait pod --for=delete --selector="app.kubernetes.io/name={{.APP}}" --timeout=5m vars: - job: volsync-wipe-{{.app}} - preconditions: - - test -f /etc/profiles/per-user/jahanson/bin/envsubst - - test -f {{.VOLSYNC_RESOURCES_DIR}}/wipe.tmpl.yaml - - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh + NS: '{{.NS | default "default"}}' + APP: '{{.APP}}' + CONTROLLER: + sh: kubectl --namespace {{.NS}} get deployment {{.APP}} &>/dev/null && echo deployment || echo statefulset - # Create VolSync replicationdestination CR to restore data .restore: internal: true cmds: - - /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.tmpl.yaml) | kubectl --context {{.cluster}} apply -f - - - bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}} - - kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m - - kubectl --context {{.cluster}} -n {{.ns}} delete replicationdestination {{.job}} - env: *env-vars + - minijinja-cli --env --trim-blocks --lstrip-blocks --autoescape=none {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.yaml.j2 | kubectl apply --server-side --filename - + - until kubectl --namespace {{.NS}} get job/{{.JOB}} &>/dev/null; do sleep 5; done + - kubectl --namespace {{.NS}} wait job/{{.JOB}} --for=condition=complete --timeout=120m + - kubectl --namespace {{.NS}} delete replicationdestination {{.JOB}} vars: - job: volsync-dst-{{.app}} + NS: '{{.NS | default "default"}}' + JOB: volsync-dst-{{.APP}} + PREVIOUS: '{{.PREVIOUS | default 2}}' + CLAIM: + sh: kubectl --namespace {{.NS}} get replicationsources/{{.APP}} --output=jsonpath="{.spec.sourcePVC}" + ACCESS_MODES: + sh: kubectl --namespace {{.NS}} get replicationsources/{{.APP}} --output=jsonpath="{.spec.restic.accessModes}" + STORAGE_CLASS_NAME: + sh: kubectl --namespace {{.NS}} get replicationsources/{{.APP}} --output=jsonpath="{.spec.restic.storageClassName}" + PUID: + sh: kubectl --namespace {{.NS}} get replicationsources/{{.APP}} --output=jsonpath="{.spec.restic.moverSecurityContext.runAsUser}" + PGID: + sh: kubectl --namespace {{.NS}} get replicationsources/{{.APP}} --output=jsonpath="{.spec.restic.moverSecurityContext.runAsGroup}" + env: + NS: '{{.NS}}' + JOB: '{{.JOB}}' + APP: '{{.APP}}' + PREVIOUS: '{{.PREVIOUS}}' + CLAIM: '{{.CLAIM}}' + ACCESS_MODES: '{{.ACCESS_MODES}}' + STORAGE_CLASS_NAME: '{{.STORAGE_CLASS_NAME}}' + PUID: '{{.PUID}}' + PGID: '{{.PGID}}' preconditions: - - test -f /etc/profiles/per-user/jahanson/bin/envsubst - - test -f {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.tmpl.yaml - - test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh + - test -f {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.yaml.j2 - # Resume Flux ks and hr .resume: internal: true cmds: - - flux --context {{.cluster}} -n {{.ns}} resume helmrelease {{.app}} - - flux --context {{.cluster}} -n flux-system resume kustomization {{.app}} - env: *env-vars + - flux --namespace {{.NS}} resume helmrelease {{.APP}} + - flux --namespace flux-system resume kustomization {{.APP}} + - kubectl --namespace {{.NS}} scale {{.CONTROLLER}}/{{.APP}} --replicas 1 + - kubectl --namespace {{.NS}} wait pod --for=condition=ready --selector="app.kubernetes.io/name={{.APP}}" --timeout=5m + vars: + NS: '{{.NS | default "default"}}' + APP: '{{.APP}}' + CONTROLLER: + sh: kubectl --namespace {{.NS}} get deployment {{.APP}} &>/dev/null && echo deployment || echo statefulset diff --git a/.taskfiles/volsync/resources/.env b/.taskfiles/volsync/resources/.env deleted file mode 100644 index 21bfcc52..00000000 --- a/.taskfiles/volsync/resources/.env +++ /dev/null @@ -1 +0,0 @@ -CLUSTER=theshire diff --git a/.taskfiles/volsync/resources/list.tmpl.yaml b/.taskfiles/volsync/resources/list.tmpl.yaml deleted file mode 100644 index e1bbc1a3..00000000 --- a/.taskfiles/volsync/resources/list.tmpl.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: ${job} - namespace: ${ns} -spec: - ttlSecondsAfterFinished: 3600 - template: - spec: - automountServiceAccountToken: false - restartPolicy: OnFailure - containers: - - name: main - image: docker.io/restic/restic:latest - args: ["snapshots"] - envFrom: - - secretRef: - name: ${app}-volsync-r2-secret - resources: {} diff --git a/.taskfiles/volsync/resources/replicationdestination.tmpl.yaml b/.taskfiles/volsync/resources/replicationdestination.tmpl.yaml deleted file mode 100644 index b560e7e3..00000000 --- a/.taskfiles/volsync/resources/replicationdestination.tmpl.yaml +++ /dev/null @@ -1,31 +0,0 @@ ---- -apiVersion: volsync.backube/v1alpha1 -kind: ReplicationDestination -metadata: - name: ${job} - namespace: ${ns} -spec: - trigger: - manual: restore-once - restic: - repository: ${app}-volsync-r2-secret - destinationPVC: ${claim} - copyMethod: Direct - storageClassName: ceph-block - # storageClassName: ceph-filesystem - # accessModes: ["ReadWriteMany"] - # IMPORTANT NOTE: - # Set to the last X number of snapshots to restore from - previous: ${previous} - # OR; - # IMPORTANT NOTE: - # On bootstrap set `restoreAsOf` to the time the old cluster was destroyed. - # This will essentially prevent volsync from trying to restore a backup - # from a application that started with default data in the PVC. - # Do not restore snapshots made after the following RFC3339 Timestamp. - # date --rfc-3339=seconds (--utc) - # restoreAsOf: "2022-12-10T16:00:00-05:00" - moverSecurityContext: - runAsUser: ${puid} - runAsGroup: ${pgid} - fsGroup: ${pgid} diff --git a/.taskfiles/volsync/resources/replicationdestination.yaml.j2 b/.taskfiles/volsync/resources/replicationdestination.yaml.j2 new file mode 100644 index 00000000..08bbc647 --- /dev/null +++ b/.taskfiles/volsync/resources/replicationdestination.yaml.j2 @@ -0,0 +1,23 @@ +--- +apiVersion: volsync.backube/v1alpha1 +kind: ReplicationDestination +metadata: + name: {{ ENV.JOB }} + namespace: {{ ENV.NS }} +spec: + trigger: + manual: restore-once + restic: + repository: {{ ENV.APP }}-volsync-secret + destinationPVC: {{ ENV.CLAIM }} + copyMethod: Direct + storageClassName: {{ ENV.STORAGE_CLASS_NAME }} + accessModes: {{ ENV.ACCESS_MODES }} + previous: {{ ENV.PREVIOUS }} + enableFileDeletion: true + cleanupCachePVC: true + cleanupTempPVC: true + moverSecurityContext: + runAsUser: {{ ENV.PUID }} + runAsGroup: {{ ENV.PGID }} + fsGroup: {{ ENV.PGID }} diff --git a/.taskfiles/volsync/resources/unlock.tmpl.yaml b/.taskfiles/volsync/resources/unlock.tmpl.yaml deleted file mode 100644 index 7afc697e..00000000 --- a/.taskfiles/volsync/resources/unlock.tmpl.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: ${job} - namespace: ${ns} -spec: - ttlSecondsAfterFinished: 3600 - template: - spec: - automountServiceAccountToken: false - restartPolicy: OnFailure - containers: - - name: minio - image: docker.io/restic/restic:latest - args: ["unlock", "--remove-all"] - envFrom: - - secretRef: - name: ${app}-volsync-secret - resources: {} - - name: r2 - image: docker.io/restic/restic:latest - args: ["unlock", "--remove-all"] - envFrom: - - secretRef: - name: ${app}-volsync-r2-secret - resources: {} diff --git a/.taskfiles/volsync/resources/wait-for-job.sh b/.taskfiles/volsync/resources/wait-for-job.sh deleted file mode 100755 index ab6bafc1..00000000 --- a/.taskfiles/volsync/resources/wait-for-job.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -JOB=$1 -NAMESPACE="${2:-default}" -CLUSTER="${3:-main}" - -[[ -z "${JOB}" ]] && echo "Job name not specified" && exit 1 -while true; do - STATUS="$(kubectl --context "${CLUSTER}" -n "${NAMESPACE}" get pod -l job-name="${JOB}" -o jsonpath='{.items[*].status.phase}')" - if [ "${STATUS}" == "Pending" ]; then - break - fi - sleep 1 -done diff --git a/.taskfiles/volsync/resources/which-controller.sh b/.taskfiles/volsync/resources/which-controller.sh deleted file mode 100755 index bcd3b28c..00000000 --- a/.taskfiles/volsync/resources/which-controller.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -APP=$1 -NAMESPACE="${2:-default}" -CLUSTER="${3:-theshire}" - -is_deployment() { - kubectl --context "${CLUSTER}" -n "${NAMESPACE}" get deployment "${APP}" >/dev/null 2>&1 -} - -is_statefulset() { - kubectl --context "${CLUSTER}" -n "${NAMESPACE}" get statefulset "${APP}" >/dev/null 2>&1 -} - -if is_deployment; then - echo "deployment.apps/${APP}" -elif is_statefulset; then - echo "statefulset.apps/${APP}" -else - echo "No deployment or statefulset found for ${APP}" - exit 1 -fi diff --git a/.taskfiles/volsync/resources/wipe.tmpl.yaml b/.taskfiles/volsync/resources/wipe.tmpl.yaml deleted file mode 100644 index ffc1cc75..00000000 --- a/.taskfiles/volsync/resources/wipe.tmpl.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: ${job} - namespace: ${ns} -spec: - ttlSecondsAfterFinished: 3600 - template: - spec: - automountServiceAccountToken: false - restartPolicy: OnFailure - containers: - - name: main - image: docker.io/library/alpine:latest - command: ["/bin/sh", "-c", "cd /config; find . -delete"] - volumeMounts: - - name: config - mountPath: /config - securityContext: - privileged: true - resources: {} - volumes: - - name: config - persistentVolumeClaim: - claimName: ${claim}