158 lines
7 KiB
YAML
158 lines
7 KiB
YAML
---
|
|
version: "3"
|
|
|
|
x-task-vars: &task-vars
|
|
rsrc: '{{.rsrc}}'
|
|
controller: '{{.controller}}'
|
|
namespace: '{{.namespace}}'
|
|
claim: '{{.claim}}'
|
|
ts: '{{.ts}}'
|
|
kustomization: '{{.kustomization}}'
|
|
previous: '{{.previous}}'
|
|
|
|
vars:
|
|
destinationTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ReplicationDestination.tmpl.yaml"
|
|
wipeJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/WipeJob.tmpl.yaml"
|
|
waitForJobScript: "{{.ROOT_DIR}}/.taskfiles/VolSync/wait-for-job.sh"
|
|
listJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/ListJob.tmpl.yaml"
|
|
unlockJobTemplate: "{{.ROOT_DIR}}/.taskfiles/VolSync/UnlockJob.tmpl.yaml"
|
|
ts: '{{now | date "150405"}}'
|
|
|
|
tasks:
|
|
|
|
list:
|
|
desc: List all snapshots taken by restic for a given ReplicationSource (ex. task volsync:list rsrc=plex [namespace=default])
|
|
silent: true
|
|
cmds:
|
|
- envsubst < <(cat {{.listJobTemplate}}) | kubectl apply -f -
|
|
- bash {{.waitForJobScript}} list-{{.rsrc}}-{{.ts}} {{.namespace}}
|
|
- kubectl -n {{.namespace}} wait job/list-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m
|
|
- kubectl -n {{.namespace}} logs job/list-{{.rsrc}}-{{.ts}} --container list
|
|
- kubectl -n {{.namespace}} delete job list-{{.rsrc}}-{{.ts}}
|
|
vars:
|
|
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
|
|
namespace: '{{.namespace | default "default"}}'
|
|
env: *task-vars
|
|
preconditions:
|
|
- sh: test -f {{.waitForJobScript}}
|
|
- sh: test -f {{.listJobTemplate}}
|
|
|
|
unlock:
|
|
desc: Unlocks restic repository for a given ReplicationSource (ex. task volsync:unlock rsrc=plex [namespace=default])
|
|
silent: true
|
|
cmds:
|
|
- envsubst < <(cat {{.unlockJobTemplate}}) | kubectl apply -f -
|
|
- bash {{.waitForJobScript}} unlock-{{.rsrc}}-{{.ts}} {{.namespace}}
|
|
- kubectl -n {{.namespace}} wait job/unlock-{{.rsrc}}-{{.ts}} --for condition=complete --timeout=1m
|
|
- kubectl -n {{.namespace}} logs job/unlock-{{.rsrc}}-{{.ts}} --container unlock
|
|
- kubectl -n {{.namespace}} delete job unlock-{{.rsrc}}-{{.ts}}
|
|
vars:
|
|
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
|
|
namespace: '{{.namespace | default "default"}}'
|
|
env: *task-vars
|
|
preconditions:
|
|
- sh: test -f {{.waitForJobScript}}
|
|
- sh: test -f {{.unlockJobTemplate}}
|
|
|
|
# To run backup jobs in parallel for all replicationsources:
|
|
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot rsrc=$0 namespace=$1'
|
|
#
|
|
snapshot:
|
|
desc: Trigger a Restic ReplicationSource snapshot (ex. task volsync:snapshot rsrc=plex [namespace=default])
|
|
cmds:
|
|
- kubectl -n {{.namespace}} patch replicationsources {{.rsrc}} --type merge -p '{"spec":{"trigger":{"manual":"{{.ts}}"}}}'
|
|
- bash {{.waitForJobScript}} volsync-src-{{.rsrc}} {{.namespace}}
|
|
- kubectl -n {{.namespace}} wait job/volsync-src-{{.rsrc}} --for condition=complete --timeout=120m
|
|
# TODO: Find a way to output logs
|
|
# Error from server (NotFound): jobs.batch "volsync-src-zzztest" not found
|
|
# - kubectl -n {{.namespace}} logs job/volsync-src-{{.rsrc}}
|
|
vars:
|
|
rsrc: '{{ or .rsrc (fail "ReplicationSource `rsrc` is required") }}'
|
|
namespace: '{{.namespace | default "default"}}'
|
|
env: *task-vars
|
|
preconditions:
|
|
- sh: test -f {{.waitForJobScript}}
|
|
- sh: kubectl -n {{.namespace}} get replicationsources {{.rsrc}}
|
|
msg: "ReplicationSource '{{.rsrc}}' not found in namespace '{{.namespace}}'"
|
|
|
|
# To run restore jobs in parallel for all replicationdestinations:
|
|
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=2 -l bash -c 'task volsync:restore rsrc=$0 namespace=$1'
|
|
#
|
|
restore:
|
|
desc: Trigger a Restic ReplicationSource restore (ex. task volsync:restore rsrc=plex [namespace=default])
|
|
cmds:
|
|
- task: restore-suspend-app
|
|
vars: *task-vars
|
|
- task: restore-wipe-job
|
|
vars: *task-vars
|
|
- task: restore-volsync-job
|
|
vars: *task-vars
|
|
- task: restore-resume-app
|
|
vars: *task-vars
|
|
vars:
|
|
rsrc: '{{ or .rsrc (fail "Variable `rsrc` is required") }}'
|
|
namespace: '{{.namespace | default "default"}}'
|
|
# 1) Query to find the Flux Kustomization associated with the ReplicationSource (rsrc)
|
|
kustomization:
|
|
sh: |
|
|
kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \
|
|
-o jsonpath="{.metadata.labels.kustomize\.toolkit\.fluxcd\.io/name}"
|
|
# 2) Query to find the Claim associated with the ReplicationSource (rsrc)
|
|
claim:
|
|
sh: |
|
|
kubectl -n {{.namespace}} get replicationsource {{.rsrc}} \
|
|
-o jsonpath="{.spec.sourcePVC}"
|
|
# 3) Query to find the controller associated with the PersistentVolumeClaim (claim)
|
|
controller:
|
|
sh: |
|
|
app=$(kubectl -n {{.namespace}} get persistentvolumeclaim {{.claim}} -o jsonpath="{.metadata.labels.app\.kubernetes\.io/name}")
|
|
if kubectl -n {{ .namespace }} get deployment.apps/$app >/dev/null 2>&1 ; then
|
|
echo "deployment.apps/$app"
|
|
else
|
|
echo "statefulset.apps/$app"
|
|
fi
|
|
previous: "{{.previous | default 2}}"
|
|
env: *task-vars
|
|
preconditions:
|
|
- sh: test -f {{.wipeJobTemplate}}
|
|
- sh: test -f {{.destinationTemplate}}
|
|
- sh: test -f {{.waitForJobScript}}
|
|
|
|
# Suspend the Flux ks and hr
|
|
restore-suspend-app:
|
|
internal: true
|
|
cmds:
|
|
- flux -n flux-system suspend kustomization {{.kustomization}}
|
|
- flux -n {{.namespace}} suspend helmrelease {{.rsrc}}
|
|
- kubectl -n {{.namespace}} scale {{.controller}} --replicas 0
|
|
- kubectl -n {{.namespace}} wait pod --for delete --selector="app.kubernetes.io/name={{.rsrc}}" --timeout=2m
|
|
env: *task-vars
|
|
|
|
# Wipe the PVC of all data
|
|
restore-wipe-job:
|
|
internal: true
|
|
cmds:
|
|
- envsubst < <(cat {{.wipeJobTemplate}}) | kubectl apply -f -
|
|
- bash {{.waitForJobScript}} wipe-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}}
|
|
- kubectl -n {{.namespace}} wait job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m
|
|
- kubectl -n {{.namespace}} logs job/wipe-{{.rsrc}}-{{.claim}}-{{.ts}} --container wipe
|
|
- kubectl -n {{.namespace}} delete job wipe-{{.rsrc}}-{{.claim}}-{{.ts}}
|
|
env: *task-vars
|
|
|
|
# Create VolSync replicationdestination CR to restore data
|
|
restore-volsync-job:
|
|
internal: true
|
|
cmds:
|
|
- envsubst < <(cat {{.destinationTemplate}}) | kubectl apply -f -
|
|
- bash {{.waitForJobScript}} volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} {{.namespace}}
|
|
- kubectl -n {{.namespace}} wait job/volsync-dst-{{.rsrc}}-{{.claim}}-{{.ts}} --for condition=complete --timeout=120m
|
|
- kubectl -n {{.namespace}} delete replicationdestination {{.rsrc}}-{{.claim}}-{{.ts}}
|
|
env: *task-vars
|
|
|
|
# Resume Flux ks and hr
|
|
restore-resume-app:
|
|
internal: true
|
|
cmds:
|
|
- flux -n {{.namespace}} resume helmrelease {{.rsrc}}
|
|
- flux -n flux-system resume kustomization {{.kustomization}}
|
|
env: *task-vars
|