add system-upgrade-controller 🤞

This commit is contained in:
Joseph Hanson 2024-10-11 05:51:48 -05:00
parent 2ca0b5805f
commit 4f2756bcd4
Signed by: jahanson
SSH key fingerprint: SHA256:vy6dKBECV522aPAwklFM3ReKAVB086rT3oWwiuiFG7o
11 changed files with 378 additions and 0 deletions

View file

@ -4,3 +4,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./remove-cpu-limits.yaml
- ./schematic-to-pod.yaml

View file

@ -0,0 +1,39 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/kyverno.io/clusterpolicy_v1.json
apiVersion: kyverno.io/v2beta1
kind: ClusterPolicy
metadata:
name: mutate-pod-binding
annotations:
pod-policies.kyverno.io/autogen-controllers: none
policies.kyverno.io/title: Mutate Pod Add Schematic
policies.kyverno.io/category: Other
policies.kyverno.io/subject: Pod
kyverno.io/kyverno-version: 1.10.0
policies.kyverno.io/minversion: 1.10.0
kyverno.io/kubernetes-version: "1.30"
spec:
background: false
rules:
- name: project-foo
match:
any:
- resources:
kinds:
- Pod/binding
names:
- apply-talos*
context:
- name: node
variable:
jmesPath: request.object.target.name
default: ""
- name: schematic
apiCall:
urlPath: "/api/v1/nodes/{{node}}"
jmesPath: 'metadata.annotations."extensions.talos.dev/schematic" || ''empty'''
mutate:
patchStrategicMerge:
metadata:
annotations:
extensions.talos.dev/schematic: "{{ schematic }}"

View file

@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Pre Flux-Kustomizations
- ./namespace.yaml
# Flux-Kustomizations
- ./system-upgrade-controller/ks.yaml

View file

@ -0,0 +1,38 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: system-upgrade
annotations:
kustomize.toolkit.fluxcd.io/prune: disabled
volsync.backube/privileged-movers: "true"
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/notification.toolkit.fluxcd.io/provider_v1beta3.json
apiVersion: notification.toolkit.fluxcd.io/v1beta3
kind: Provider
metadata:
name: alert-manager
namespace: system-upgrade
spec:
type: alertmanager
address: http://alertmanager.observability.svc.cluster.local:9093/api/v2/alerts/
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/notification.toolkit.fluxcd.io/alert_v1beta3.json
apiVersion: notification.toolkit.fluxcd.io/v1beta3
kind: Alert
metadata:
name: alert-manager
namespace: system-upgrade
spec:
providerRef:
name: alert-manager
eventSeverity: error
eventSources:
- kind: HelmRelease
name: "*"
exclusionList:
- "error.*lookup github\\.com"
- "error.*lookup raw\\.githubusercontent\\.com"
- "dial.*tcp.*timeout"
- "waiting.*socket"
suspend: false

View file

@ -0,0 +1,101 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app system-upgrade-controller
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.5.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
controllers:
system-upgrade-controller:
strategy: RollingUpdate
containers:
app:
image:
repository: docker.io/rancher/system-upgrade-controller
tag: v0.14.1@sha256:7e13a9b2b984f0c0fd6328439b575348723cc6954b91db3453057fcb784e2d29
env:
SYSTEM_UPGRADE_CONTROLLER_DEBUG: false
SYSTEM_UPGRADE_CONTROLLER_THREADS: 2
SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS: 900
SYSTEM_UPGRADE_JOB_BACKOFF_LIMIT: 99
SYSTEM_UPGRADE_JOB_IMAGE_PULL_POLICY: IfNotPresent
SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE: registry.k8s.io/kubectl:v1.31.1
SYSTEM_UPGRADE_JOB_POD_REPLACEMENT_POLICY: Failed
SYSTEM_UPGRADE_JOB_PRIVILEGED: true
SYSTEM_UPGRADE_JOB_TTL_SECONDS_AFTER_FINISH: 900
SYSTEM_UPGRADE_PLAN_POLLING_INTERVAL: 15m
SYSTEM_UPGRADE_CONTROLLER_NAME: *app
SYSTEM_UPGRADE_CONTROLLER_NAMESPACE:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities: { drop: ["ALL"] }
seccompProfile:
type: RuntimeDefault
defaultPodOptions:
securityContext:
runAsNonRoot: true
runAsUser: 65534
runAsGroup: 65534
seccompProfile: { type: RuntimeDefault }
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
serviceAccount:
create: true
name: system-upgrade
persistence:
tmp:
type: emptyDir
etc-ssl:
type: hostPath
hostPath: /etc/ssl
hostPathType: DirectoryOrCreate
globalMounts:
- readOnly: true
etc-pki:
type: hostPath
hostPath: /etc/pki
hostPathType: DirectoryOrCreate
globalMounts:
- readOnly: true
etc-ca-certificates:
type: hostPath
hostPath: /etc/ca-certificates
hostPathType: DirectoryOrCreate
globalMounts:
- readOnly: true

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helmrelease.yaml
- rbac.yaml

View file

@ -0,0 +1,21 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system-upgrade
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: system-upgrade
namespace: system-upgrade
---
apiVersion: talos.dev/v1alpha1
kind: ServiceAccount
metadata:
name: talos
spec:
roles:
- os:admin

View file

@ -0,0 +1,50 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app system-upgrade-controller
namespace: flux-system
spec:
targetNamespace: system-upgrade
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: node-feature-discovery-rules
path: ./kubernetes/apps/system-upgrade/system-upgrade-controller/app
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: true
interval: 30m
timeout: 5m
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app system-upgrade-controller-plans
namespace: flux-system
spec:
targetNamespace: system-upgrade
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: system-upgrade-controller
path: ./kubernetes/apps/system-upgrade/system-upgrade-controller/plans
prune: true
sourceRef:
kind: GitRepository
name: theshire
wait: false
interval: 30m
timeout: 5m
postBuild:
substitute:
# renovate: datasource=docker depName=ghcr.io/siderolabs/installer
TALOS_VERSION: v1.7.6
# renovate: datasource=docker depName=ghcr.io/siderolabs/kubelet
KUBERNETES_VERSION: v1.30.2

View file

@ -0,0 +1,45 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/upgrade.cattle.io/plan_v1.json
apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: kubernetes
spec:
version: ${KUBERNETES_VERSION}
serviceAccountName: system-upgrade
secrets:
- name: talos
path: /var/run/secrets/talos.dev
ignoreUpdates: true
concurrency: 1
exclusive: true
nodeSelector:
matchExpressions:
- key: feature.node.kubernetes.io/system-os_release.ID
operator: In
values: ["talos"]
- key: node-role.kubernetes.io/control-plane
operator: Exists
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
prepare: &prepare
image: ghcr.io/siderolabs/talosctl:${TALOS_VERSION}
envs:
- name: NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
args:
- --nodes=$(NODE_IP)
- health
- --server=false
upgrade:
<<: *prepare
args:
- --nodes=$(NODE_IP)
- upgrade-k8s
- --to=$(SYSTEM_UPGRADE_PLAN_LATEST_VERSION)

View file

@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./kubernetes.yaml
- ./talos.yaml

View file

@ -0,0 +1,60 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/upgrade.cattle.io/plan_v1.json
apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: talos
spec:
version: ${TALOS_VERSION}
serviceAccountName: system-upgrade
secrets:
- name: talos
path: /var/run/secrets/talos.dev
ignoreUpdates: true
concurrency: 1
exclusive: true
nodeSelector:
matchExpressions:
- key: feature.node.kubernetes.io/system-os_release.ID
operator: In
values: ["talos"]
- key: feature.node.kubernetes.io/system-os_release.VERSION_ID
operator: NotIn
values: ["${TALOS_VERSION}"]
- key: kubernetes.io/hostname
operator: NotIn
values: ["gandalf-01", "shadowfax-01"]
# - key: factory.talos.dev/schematic-id.part-0
# operator: In
# values: ["${TALOS_SCHEMATIC_ID:0:32}"]
# - key: factory.talos.dev/schematic-id.part-1
# operator: In
# values: ["${TALOS_SCHEMATIC_ID:32}"]
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
prepare: &prepare
image: ghcr.io/siderolabs/talosctl:${TALOS_VERSION}
envs:
- name: NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: TALOS_SCHEMATIC_ID
valueFrom:
fieldRef:
fieldPath: metadata.annotations['extensions.talos.dev/schematic']
args:
- --nodes=$(NODE_IP)
- health
- --server=false
upgrade:
<<: *prepare
args:
- --nodes=$(NODE_IP)
- upgrade
- --image=factory.talos.dev/installer/${TALOS_SCHEMATIC_ID}:$(SYSTEM_UPGRADE_PLAN_LATEST_VERSION)
- --wait=false