theshire/.taskfiles/VolSync/Tasks.yaml

158 lines
7 KiB
YAML

---
version: "3"
x-task-vars: &task-vars
rsrc: '{{.rsrc}}'
controller: '{{.controller}}'
namespace: '{{.namespace}}'
claim: '{{.claim}}'
ts: '{{.ts}}'
kustomization: '{{.kustomization}}'
previous: '{{.previous}}'
vars:
destinationTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ReplicationDestination.tmpl.yaml"
wipeJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/WipeJob.tmpl.yaml"
waitForJobScript: "{{.ROOT_DIR}}/.taskfiles/VolSync/wait-for-job.sh"
listJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ListJob.tmpl.yaml"
unlockJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/UnlockJob.tmpl.yaml"
ts: '{{now | date "150405"}}'
tasks:
list:
desc: List all snapshots taken by restic for a given ReplicationSource (ex. task volsync:list rsrc=plex [namespace=default])
silent: true
cmds:
- envsubst < <(cat {{.listJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} list-{{.rsrc}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/list-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m
- kubectl -n {{.namespace}} logs job/list-{{.rsrc}}-{{.ts}} --container list
- kubectl -n {{.namespace}} delete job list-{{.rsrc}}-{{.ts}}
vars:
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
env: *task-vars
preconditions:
# - sh: test -f {{.waitForJobScript}}
- sh: test -f {{.listJobTemplate}}
unlock:
desc: Unlocks restic repository for a given ReplicationSource (ex. task volsync:unlock rsrc=plex [namespace=default])
silent: true
cmds:
- envsubst < <(cat {{.unlockJobTemplate}}) | kubectl apply -f -
# - bash {{.waitForJobScript}} unlock-{{.rsrc}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/unlock-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m
- kubectl -n {{.namespace}} logs job/unlock-{{.rsrc}}-{{.ts}} --container unlock
- kubectl -n {{.namespace}} delete job unlock-{{.rsrc}}-{{.ts}}
vars:
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
env: *task-vars
preconditions:
# - sh: test -f {{.waitForJobScript}}
- sh: test -f {{.unlockJobTemplate}}
# To run backup jobs in parallel for all replicationsources:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot rsrc=$0 namespace=$1'
#
snapshot:
desc: Trigger a Restic ReplicationSource snapshot (ex. task volsync:snapshot rsrc=plex [namespace=default])
cmds:
- kubectl -n {{.namespace}} patch replicationsources {{.rsrc}} --type merge -p '{"spec":{"trigger":{"manual":"{{.ts}}"}}}'
- bash {{.waitForJobScript}} volsync-src-{{.rsrc}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/volsync-src-{{.rsrc}} --for condition=complete --timeout=120m
# TODO: Find a way to output logs
# Error from server (NotFound): jobs.batch "volsync-src-zzztest" not found
# - kubectl -n {{.namespace}} logs job/volsync-src-{{.rsrc}}
vars:
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
env: *task-vars
preconditions:
# - sh: test -f {{.waitForJobScript}}
- sh: kubectl -n {{.namespace}} get replicationsources {{.rsrc}}
msg: "ReplicationSource '{{.rsrc}}' not found in namespace '{{.namespace}}'"
# To run restore jobs in parallel for all replicationdestinations:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=2 -l bash -c 'task volsync:restore rsrc=$0 namespace=$1'
#
restore:
desc: Trigger a Restic ReplicationSource restore (ex. task volsync:restore rsrc=plex [namespace=default])
cmds:
- task: restore-suspend-app
vars: *task-vars
- task: restore-wipe-job
vars: *task-vars
- task: restore-volsync-job
vars: *task-vars
- task: restore-resume-app
vars: *task-vars
vars:
rsrc: '{{ or .rsrc (fail "Variable `rsrc` is required") }}'
namespace: '{{.namespace | default "default"}}'
# 1) Query to find the Flux Kustomization associated with the ReplicationSource (rsrc)
kustomization:
sh: |
kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \
-o jsonpath="{.metadata.labels.kustomize\.toolkit\.fluxcd\.io/name}"
# 2) Query to find the Claim associated with the ReplicationSource (rsrc)
claim:
sh: |
kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \
-o jsonpath="{.spec.sourcePVC}"
# 3) Query to find the controller associated with the PersistentVolumeClaim (claim)
controller:
sh: |
app=$(kubectl -n {{.namespace}} get persistentvolumeclaim {{.claim}} -o jsonpath="{.metadata.labels.app\.kubernetes\.io/name}")
if kubectl -n {{ .namespace }} get deployment.apps/$app >/dev/null 2>&1 ; then
echo "deployment.apps/$app"
else
echo "statefulset.apps/$app"
fi
previous: "{{.previous | default 2}}"
env: *task-vars
preconditions:
- sh: test -f {{.wipeJobTemplate}}
- sh: test -f {{.destinationTemplate}}
# - sh: test -f {{.waitForJobScript}}
# Suspend the Flux ks and hr
restore-suspend-app:
internal: true
cmds:
- flux -n flux-system suspend kustomization {{.kustomization}}
- flux -n {{.namespace}} suspend helmrelease {{.rsrc}}
- kubectl -n {{.namespace}} scale {{.controller}} --replicas 0
- kubectl -n {{.namespace}} wait pod --for delete --selector="app.kubernetes.io/name={{.rsrc}}" --timeout=2m
env: *task-vars
# Wipe the PVC of all data
restore-wipe-job:
internal: true
cmds:
- envsubst < <(cat {{.wipeJobTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} wipe-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m
- kubectl -n {{.namespace}} logs job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --container wipe
- kubectl -n {{.namespace}} delete job wipe-{{.rsrc}}-{{.claim}}-{{.ts}}
env: *task-vars
# Create VolSync replicationdestination CR to restore data
restore-volsync-job:
internal: true
cmds:
- envsubst < <(cat {{.destinationTemplate}}) | kubectl apply -f -
- bash {{.waitForJobScript}} volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}}
- kubectl -n {{.namespace}} wait job/volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m
- kubectl -n {{.namespace}} delete replicationdestination {{.rsrc}}-{{.claim}}-{{.ts}}
env: *task-vars
# Resume Flux ks and hr
restore-resume-app:
internal: true
cmds:
- flux -n {{.namespace}} resume helmrelease {{.rsrc}}
- flux -n flux-system resume kustomization {{.kustomization}}
env: *task-vars