theshire/.taskfiles/volsync/Taskfile.yaml
Joseph Hanson d9ff973a55
moving to the shire
Expanding from 1 node to 6 + 2 VMs with GPUs
2024-09-04 13:35:14 -05:00

222 lines
9.7 KiB
YAML

---
# yaml-language-server: $schema=https://taskfile.dev/schema.json
version: "3"
# This taskfile is used to manage certain VolSync tasks for a given application, limitations are described below.
# 1. Fluxtomization, HelmRelease, PVC, ReplicationSource all have the same name (e.g. plex)
# 2. ReplicationSource and ReplicationDestination are a Restic repository
# 3. Applications are deployed as either a Kubernetes Deployment or StatefulSet
# 4. Each application only has one PVC that is being replicated
x-env-vars: &env-vars
app: "{{.app}}"
claim: "{{.claim}}"
controller: "{{.controller}}"
job: "{{.job}}"
ns: "{{.ns}}"
pgid: "{{.pgid}}"
previous: "{{.previous}}"
puid: "{{.puid}}"
vars:
VOLSYNC_RESOURCES_DIR: "{{.ROOT_DIR}}/.taskfiles/volsync/resources"
tasks:
state-*:
desc: Suspend or Resume Volsync
summary: |
state: resume or suspend (required)
dotenv: ['{{.VOLSYNC_RESOURCES_DIR}}/.env']
cmds:
- flux --context $CLUSTER {{.state}} kustomization volsync
- flux --context $CLUSTER -n {{.ns}} {{.state}} helmrelease volsync
- kubectl --context $CLUSTER -n {{.ns}} scale deployment volsync --replicas {{if eq "suspend" .state}}0{{else}}1{{end}}
env: *env-vars
vars:
ns: '{{.ns | default "volsync-system"}}'
state: '{{index .MATCH 0}}'
list:
desc: List snapshots for an application
summary: |
ns: Namespace the PVC is in (default: default)
app: Application to list snapshots for (required)
dotenv: ['{{.VOLSYNC_RESOURCES_DIR}}/.env']
cmds:
- /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/list.tmpl.yaml) | kubectl --context $CLUSTER apply -f -
- bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} $CLUSTER
- kubectl --context $CLUSTER -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=1m
- kubectl --context $CLUSTER -n {{.ns}} logs job/{{.job}} --container main
- kubectl --context $CLUSTER -n {{.ns}} delete job {{.job}}
env: *env-vars
requires:
vars: ["app"]
vars:
ns: '{{.ns | default "default"}}'
job: volsync-list-{{.app}}
preconditions:
- test -f /etc/profiles/per-user/jahanson/bin/envsubst
- test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh
- test -f {{.VOLSYNC_RESOURCES_DIR}}/list.tmpl.yaml
silent: true
unlock:
desc: Unlock a Restic repository for an application
summary: |
ns: Namespace the PVC is in (default: default)
app: Application to unlock (required)
dotenv: ['{{.VOLSYNC_RESOURCES_DIR}}/.env']
cmds:
- /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/unlock.tmpl.yaml) | kubectl --context $CLUSTER apply -f -
- bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} $CLUSTER
- kubectl --context $CLUSTER -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=1m
- kubectl --context $CLUSTER -n {{.ns}} logs job/{{.job}} --container minio
- kubectl --context $CLUSTER -n {{.ns}} logs job/{{.job}} --container r2
- kubectl --context $CLUSTER -n {{.ns}} delete job {{.job}}
env: *env-vars
requires:
vars: ["app"]
vars:
ns: '{{.ns | default "default"}}'
job: volsync-unlock-{{.app}}
preconditions:
- test -f /etc/profiles/per-user/jahanson/bin/envsubst
- test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh
- test -f {{.VOLSYNC_RESOURCES_DIR}}/unlock.tmpl.yaml
silent: true
# To run backup jobs in parallel for all replicationsources:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:snapshot app=$0 ns=$1'
snapshot:
desc: Snapshot a PVC for an application
summary: |
cluster: Cluster to run command against (required)
ns: Namespace the PVC is in (default: default)
app: Application to snapshot (required)
cmds:
- kubectl --context {{.cluster}} -n {{.ns}} patch replicationsources {{.app}} --type merge -p '{"spec":{"trigger":{"manual":"{{.now}}"}}}'
- bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}}
- kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m
env: *env-vars
requires:
vars: ["cluster", "app"]
vars:
now: '{{now | date "150405"}}'
ns: '{{.ns | default "default"}}'
job: volsync-src-{{.app}}
controller:
sh: true && {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh {{.app}} {{.ns}} {{.cluster}}
preconditions:
- test -f {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh
- test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh
- kubectl --context {{.cluster}} -n {{.ns}} get replicationsources {{.app}}
# To run restore jobs in parallel for all replicationdestinations:
# - kubectl get replicationsources --all-namespaces --no-headers | awk '{print $2, $1}' | xargs --max-procs=4 -l bash -c 'task volsync:restore app=$0 ns=$1'
restore:
desc: Restore a PVC for an application
summary: |
cluster: Cluster to run command against (required)
ns: Namespace the PVC is in (default: default)
app: Application to restore (required)
previous: Previous number of snapshots to restore (default: 2)
cmds:
- { task: .suspend, vars: *env-vars }
- { task: .wipe, vars: *env-vars }
- { task: .restore, vars: *env-vars }
- { task: .resume, vars: *env-vars }
env: *env-vars
requires:
vars: ["cluster", "app"]
vars:
ns: '{{.ns | default "default"}}'
previous: '{{.previous | default 2}}'
controller:
sh: "{{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh {{.app}} {{.ns}}"
claim:
sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.sourcePVC}"
puid:
sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsUser}"
pgid:
sh: kubectl --context {{.cluster}} -n {{.ns}} get replicationsources/{{.app}} -o jsonpath="{.spec.restic.moverSecurityContext.runAsGroup}"
preconditions:
- test -f {{.VOLSYNC_RESOURCES_DIR}}/which-controller.sh
cleanup:
desc: Delete volume populator PVCs in all namespaces
summary: |
cluster: Cluster to run command against (required)
cmds:
- for: { var: dest }
cmd: |
{{- $items := (split "/" .ITEM) }}
kubectl --context {{.cluster}} delete pvc -n {{ $items._0 }} {{ $items._1 }}
- for: { var: cache }
cmd: |
{{- $items := (split "/" .ITEM) }}
kubectl --context {{.cluster}} delete pvc -n {{ $items._0 }} {{ $items._1 }}
- for: { var: snaps }
cmd: |
{{- $items := (split "/" .ITEM) }}
kubectl --context {{.cluster}} delete volumesnapshot -n {{ $items._0 }} {{ $items._1 }}
env: *env-vars
requires:
vars: ["cluster"]
vars:
dest:
sh: kubectl --context {{.cluster}} get pvc --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}'
cache:
sh: kubectl --context {{.cluster}} get pvc --all-namespaces --no-headers | grep "dst-cache" | awk '{print $1 "/" $2}'
snaps:
sh: kubectl --context {{.cluster}} get volumesnapshot --all-namespaces --no-headers | grep "dst-dest" | awk '{print $1 "/" $2}'
# Suspend the Flux ks and hr
.suspend:
internal: true
cmds:
- flux --context {{.cluster}} -n flux-system suspend kustomization {{.app}}
- flux --context {{.cluster}} -n {{.ns}} suspend helmrelease {{.app}}
- kubectl --context {{.cluster}} -n {{.ns}} scale {{.controller}} --replicas 0
- kubectl --context {{.cluster}} -n {{.ns}} wait pod --for delete --selector="app.kubernetes.io/name={{.app}}" --timeout=2m
env: *env-vars
# Wipe the PVC of all data
.wipe:
internal: true
cmds:
- /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/wipe.tmpl.yaml) | kubectl --context {{.cluster}} apply -f -
- bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}}
- kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m
- kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container main
- kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}}
env: *env-vars
vars:
job: volsync-wipe-{{.app}}
preconditions:
- test -f /etc/profiles/per-user/jahanson/bin/envsubst
- test -f {{.VOLSYNC_RESOURCES_DIR}}/wipe.tmpl.yaml
- test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh
# Create VolSync replicationdestination CR to restore data
.restore:
internal: true
cmds:
- /etc/profiles/per-user/jahanson/bin/envsubst < <(cat {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.tmpl.yaml) | kubectl --context {{.cluster}} apply -f -
- bash {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}}
- kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m
- kubectl --context {{.cluster}} -n {{.ns}} delete replicationdestination {{.job}}
env: *env-vars
vars:
job: volsync-dst-{{.app}}
preconditions:
- test -f /etc/profiles/per-user/jahanson/bin/envsubst
- test -f {{.VOLSYNC_RESOURCES_DIR}}/replicationdestination.tmpl.yaml
- test -f {{.VOLSYNC_RESOURCES_DIR}}/wait-for-job.sh
# Resume Flux ks and hr
.resume:
internal: true
cmds:
- flux --context {{.cluster}} -n {{.ns}} resume helmrelease {{.app}}
- flux --context {{.cluster}} -n flux-system resume kustomization {{.app}}
env: *env-vars