scale-to-one-node #269

Merged
jahanson merged 3 commits from scale-to-one-node into main 2024-05-07 13:37:02 -05:00
28 changed files with 134 additions and 80 deletions

View file

@ -87,7 +87,7 @@ tasks:
"containers": [ "containers": [
{ {
"name": "debug", "name": "debug",
"image": "ghcr.io/onedr0p/alpine:rolling", "image": "docker.io/library/alpine:3.19.1",
"command": ["/bin/bash"], "command": ["/bin/bash"],
"stdin": true, "stdin": true,
"stdinOnce": true, "stdinOnce": true,

View file

@ -39,7 +39,7 @@ spec:
metadata: metadata:
labels: labels:
app.kubernetes.io/name: crunchy-postgres app.kubernetes.io/name: crunchy-postgres
replicas: &replica 3 replicas: &replica 1
dataVolumeClaimSpec: dataVolumeClaimSpec:
storageClassName: openebs-hostpath storageClassName: openebs-hostpath
accessModes: accessModes:

View file

@ -7,7 +7,7 @@ metadata:
app.kubernetes.io/name: dragonfly app.kubernetes.io/name: dragonfly
name: dragonfly name: dragonfly
spec: spec:
replicas: 2 replicas: 1
resources: resources:
requests: requests:
cpu: 500m cpu: 500m

View file

@ -23,7 +23,7 @@ containerRuntime:
localRedirectPolicy: true localRedirectPolicy: true
operator: operator:
rollOutPods: true replicas: 1
ipam: ipam:
mode: kubernetes mode: kubernetes
kubeProxyReplacement: true kubeProxyReplacement: true

View file

@ -24,7 +24,7 @@ spec:
uninstall: uninstall:
keepHistory: false keepHistory: false
values: values:
replicas: 2 replicas: 1
kind: Deployment kind: Deployment
deschedulerPolicyAPIVersion: descheduler/v1alpha2 deschedulerPolicyAPIVersion: descheduler/v1alpha2
deschedulerPolicy: deschedulerPolicy:

View file

@ -0,0 +1,3 @@
---
providerRegex: ^shadowfax$
bypassDnsResolution: true

View file

@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: kubelet-csr-approver
spec:
interval: 30m
chart:
spec:
chart: kubelet-csr-approver
version: 1.1.0
sourceRef:
kind: HelmRepository
name: postfinance
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
valuesFrom:
- kind: ConfigMap
name: kubelet-csr-approver-helm-values
values:
metrics:
enable: true
serviceMonitor:
enabled: true

View file

@ -0,0 +1,12 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
configMapGenerator:
- name: kubelet-csr-approver-helm-values
files:
- values.yaml=./helm-values.yaml
configurations:
- kustomizeconfig.yaml

View file

@ -0,0 +1,7 @@
---
nameReference:
- kind: ConfigMap
version: v1
fieldSpecs:
- path: spec/valuesFrom/name
kind: HelmRelease

View file

@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app kubelet-csr-approver
namespace: flux-system
spec:
targetNamespace: kube-system
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/kube-system/kubelet-csr-approver/app
prune: false # never should be deleted
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -10,6 +10,7 @@ resources:
- ./descheduler/ks.yaml - ./descheduler/ks.yaml
- ./dnsimple-webhook-rbac.yaml - ./dnsimple-webhook-rbac.yaml
- ./fstrim/ks.yaml - ./fstrim/ks.yaml
- ./kubelet-csr-approver/ks.yaml
- ./metrics-server/ks.yaml - ./metrics-server/ks.yaml
- ./multus/ks.yaml - ./multus/ks.yaml
- ./intel-device-plugin/ks.yaml - ./intel-device-plugin/ks.yaml

View file

@ -56,7 +56,7 @@ spec:
serviceMonitor: serviceMonitor:
enabled: true enabled: true
admissionController: admissionController:
replicas: 3 replicas: 1
serviceMonitor: serviceMonitor:
enabled: true enabled: true
rbac: rbac:

View file

@ -42,7 +42,7 @@ spec:
metadata: metadata:
labels: labels:
app.kubernetes.io/name: pgo-${APP} app.kubernetes.io/name: pgo-${APP}
replicas: 2 replicas: 1
dataVolumeClaimSpec: dataVolumeClaimSpec:
storageClassName: openebs-hostpath storageClassName: openebs-hostpath
accessModes: accessModes:

View file

@ -28,7 +28,7 @@ spec:
values: values:
controllers: controllers:
cloudflared: cloudflared:
replicas: 2 replicas: 1
strategy: RollingUpdate strategy: RollingUpdate
annotations: annotations:
reloader.stakater.com/auto: "true" reloader.stakater.com/auto: "true"

View file

@ -22,7 +22,7 @@ spec:
valuesKey: MAXMIND_LICENSE_KEY valuesKey: MAXMIND_LICENSE_KEY
values: values:
controller: controller:
replicaCount: 2 replicaCount: 1
updateStrategy: updateStrategy:
type: RollingUpdate type: RollingUpdate
allowSnippetAnnotations: true allowSnippetAnnotations: true

View file

@ -20,7 +20,7 @@ spec:
fullnameOverride: nginx-internal fullnameOverride: nginx-internal
controller: controller:
replicaCount: 3 replicaCount: 1
updateStrategy: updateStrategy:
type: RollingUpdate type: RollingUpdate

View file

@ -29,7 +29,7 @@ spec:
- name: loki - name: loki
namespace: observability namespace: observability
values: values:
replicas: 2 replicas: 1
envFromSecret: grafana-secret envFromSecret: grafana-secret
dashboardProviders: dashboardProviders:
dashboardproviders.yaml: dashboardproviders.yaml:

View file

@ -45,7 +45,7 @@ spec:
- hosts: - hosts:
- *host - *host
alertmanagerSpec: alertmanagerSpec:
replicas: 2 replicas: 1
useExistingSecret: true useExistingSecret: true
configSecret: alertmanager-secret configSecret: alertmanager-secret
storage: storage:
@ -117,7 +117,7 @@ spec:
podMetadata: podMetadata:
annotations: annotations:
secret.reloader.stakater.com/reload: &secret thanos-objstore-config secret.reloader.stakater.com/reload: &secret thanos-objstore-config
replicas: 2 replicas: 1
replicaExternalLabelName: __replica__ replicaExternalLabelName: __replica__
scrapeInterval: 1m # Must match interval in Grafana Helm chart scrapeInterval: 1m # Must match interval in Grafana Helm chart
ruleSelectorNilUsesHelmValues: false ruleSelectorNilUsesHelmValues: false

View file

@ -111,12 +111,12 @@ spec:
analytics: analytics:
reporting_enabled: false reporting_enabled: false
backend: backend:
replicas: 2 replicas: 1
persistence: persistence:
size: 20Gi size: 20Gi
storageClass: openebs-hostpath storageClass: openebs-hostpath
gateway: gateway:
replicas: 2 replicas: 1
image: image:
registry: ghcr.io registry: ghcr.io
ingress: ingress:
@ -130,9 +130,9 @@ spec:
tls: tls:
- hosts: [*host] - hosts: [*host]
read: read:
replicas: 2 replicas: 1
write: write:
replicas: 2 replicas: 1
persistence: persistence:
size: 20Gi size: 20Gi
storageClass: openebs-hostpath storageClass: openebs-hostpath

View file

@ -75,11 +75,11 @@ spec:
storageClass: openebs-hostpath storageClass: openebs-hostpath
size: 10Gi size: 10Gi
query: query:
replicas: 2 replicas: 1
extraArgs: ["--alert.query-url=https://thanos.jahanson.tech"] extraArgs: ["--alert.query-url=https://thanos.jahanson.tech"]
queryFrontend: queryFrontend:
enabled: true enabled: true
replicas: 2 replicas: 1
extraEnv: &extraEnv extraEnv: &extraEnv
- name: THANOS_CACHE_CONFIG - name: THANOS_CACHE_CONFIG
valueFrom: valueFrom:
@ -98,7 +98,7 @@ spec:
configmap.reloader.stakater.com/reload: *configMap configmap.reloader.stakater.com/reload: *configMap
rule: rule:
enabled: true enabled: true
replicas: 2 replicas: 1
extraArgs: ["--web.prefix-header=X-Forwarded-Prefix"] extraArgs: ["--web.prefix-header=X-Forwarded-Prefix"]
alertmanagersConfig: alertmanagersConfig:
value: |- value: |-
@ -120,7 +120,7 @@ spec:
severity: critical severity: critical
persistence: *persistence persistence: *persistence
storeGateway: storeGateway:
replicas: 2 replicas: 1
extraEnv: *extraEnv extraEnv: *extraEnv
extraArgs: ["--index-cache.config=$(THANOS_CACHE_CONFIG)"] extraArgs: ["--index-cache.config=$(THANOS_CACHE_CONFIG)"]
persistence: *persistence persistence: *persistence

View file

@ -26,7 +26,7 @@ spec:
values: values:
controllers: controllers:
vector-aggregator: vector-aggregator:
replicas: 2 replicas: 1
strategy: RollingUpdate strategy: RollingUpdate
annotations: annotations:
reloader.stakater.com/auto: "true" reloader.stakater.com/auto: "true"

View file

@ -49,6 +49,7 @@ spec:
bdev_enable_discard = true bdev_enable_discard = true
bdev_async_discard = true bdev_async_discard = true
osd_class_update_on_start = false osd_class_update_on_start = false
osd_pool_default_size = 1
cephClusterSpec: cephClusterSpec:
network: network:
provider: host provider: host
@ -63,20 +64,7 @@ spec:
storage: storage:
useAllNodes: true useAllNodes: true
useAllDevices: false useAllDevices: false
deviceFilter: "xvdb|nvme1n1|nvme0n1" deviceFilter: "nvme2n1"
placement:
mgr: &placement
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
tolerations: # allow mgr to run on control plane nodes
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
mon: *placement
resources: resources:
mgr: mgr:
requests: requests:
@ -103,8 +91,6 @@ spec:
- name: ceph-blockpool - name: ceph-blockpool
spec: spec:
failureDomain: host failureDomain: host
replicated:
size: 3
storageClass: storageClass:
enabled: true enabled: true
name: ceph-block name: ceph-block
@ -130,12 +116,8 @@ spec:
- name: ceph-filesystem - name: ceph-filesystem
spec: spec:
metadataPool: metadataPool:
replicated:
size: 3
dataPools: dataPools:
- failureDomain: host - failureDomain: host
replicated:
size: 3
name: data0 name: data0
metadataServer: metadataServer:
activeCount: 1 activeCount: 1
@ -171,13 +153,8 @@ spec:
spec: spec:
metadataPool: metadataPool:
failureDomain: host failureDomain: host
replicated:
size: 3
dataPool: dataPool:
failureDomain: host failureDomain: host
erasureCoded:
dataChunks: 2
codingChunks: 1
preservePoolsOnDelete: true preservePoolsOnDelete: true
gateway: gateway:
port: 80 port: 80

View file

@ -18,7 +18,7 @@ spec:
namespace: flux-system namespace: flux-system
values: values:
installCRDs: true installCRDs: true
replicaCount: 3 replicaCount: 1
leaderElect: true leaderElect: true
serviceMonitor: serviceMonitor:
enabled: true enabled: true

View file

@ -10,8 +10,8 @@ helmDefaults:
repositories: repositories:
- name: cilium - name: cilium
url: https://helm.cilium.io url: https://helm.cilium.io
- name: nvdp - name: postfinance
url: https://nvidia.github.io/k8s-device-plugin url: https://postfinance.github.io/kubelet-csr-approver
releases: releases:
- name: cilium - name: cilium
@ -20,12 +20,12 @@ releases:
version: 1.15.4 version: 1.15.4
values: ["../../../apps/kube-system/cilium/app/resources/values.yml"] values: ["../../../apps/kube-system/cilium/app/resources/values.yml"]
wait: true wait: true
- name: nvidia-device-plugin - name: kubelet-csr-approver
namespace: kube-system namespace: kube-system
chart: nvdp/nvidia-device-plugin chart: postfinance/kubelet-csr-approver
version: 0.14.5 version: 1.1.0
values: ["../../../apps/kube-system/nvidia-device-plugin/app/resources/values.yml"] values: ["../../../apps/kube-system/kubelet-csr-approver/app/helm-values.yaml"]
wait: true needs: ["cilium"]
- name: spegel - name: spegel
namespace: kube-system namespace: kube-system
chart: oci://ghcr.io/spegel-org/helm-charts/spegel chart: oci://ghcr.io/spegel-org/helm-charts/spegel

View file

@ -6,9 +6,6 @@ talosVersion: v1.7.1
kubernetesVersion: 1.28.4 kubernetesVersion: 1.28.4
endpoint: "https://10.1.1.57:6443" endpoint: "https://10.1.1.57:6443"
cniConfig:
name: none
additionalApiServerCertSans: additionalApiServerCertSans:
- 10.1.1.57 - 10.1.1.57
@ -21,10 +18,12 @@ nodes:
ipAddress: 10.1.1.61 ipAddress: 10.1.1.61
controlPlane: true controlPlane: true
installDiskSelector: installDiskSelector:
busPath: /dev/nvme0n1 busPath: /pci0000:20/0000:20:01.2/0000:2d:00.0/nvme/nvme1/nvme1n1
networkInterfaces: networkInterfaces:
- interface: eth0 - interface: enp37s0f1
dhcp: true dhcp: true
- interface: enp37s0f0
dhcp: false
kernelModules: kernelModules:
- name: nvidia - name: nvidia
- name: nvidia_uvm - name: nvidia_uvm
@ -55,7 +54,7 @@ controlPlane:
machine: machine:
network: network:
nameservers: nameservers:
- 10.1.1.11 - 10.1.1.1
# Configure NTP # Configure NTP
- |- - |-
@ -79,6 +78,9 @@ controlPlane:
allowSchedulingOnMasters: true allowSchedulingOnMasters: true
proxy: proxy:
disabled: true disabled: true
network:
cni:
name: none
# ETCD configuration # ETCD configuration
- |- - |-

View file

@ -34,7 +34,7 @@ spec:
metadata: metadata:
labels: labels:
app.kubernetes.io/name: pgo-${APP} app.kubernetes.io/name: pgo-${APP}
replicas: 2 replicas: 1
dataVolumeClaimSpec: dataVolumeClaimSpec:
storageClassName: openebs-hostpath storageClassName: openebs-hostpath
accessModes: accessModes:

View file

@ -3,19 +3,19 @@ apiVersion: v1
kind: Pod kind: Pod
metadata: metadata:
name: disk-wipe-one name: disk-wipe-one
namespace: rook-ceph namespace: kube-system
spec: spec:
restartPolicy: Never restartPolicy: Never
nodeName: talos-ltk-p4a nodeName: shadowfax
containers: containers:
- name: disk-wipe - name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638 image: docker.io/library/alpine:3.19.1
securityContext: securityContext:
privileged: true privileged: true
resources: {} resources: {}
env: env:
- name: CEPH_DISK - name: CEPH_DISK
value: "/dev/xvdb" value: "/dev/nvme2n1"
command: command:
[ [
"/bin/sh", "/bin/sh",
@ -34,4 +34,3 @@ spec:
- name: host-var - name: host-var
hostPath: hostPath:
path: /var path: /var

View file

@ -9,7 +9,7 @@ spec:
nodeName: talos-fki-fmf nodeName: talos-fki-fmf
containers: containers:
- name: disk-wipe - name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638 image: docker.io/library/alpine:3.19.1
securityContext: securityContext:
privileged: true privileged: true
resources: {} resources: {}
@ -46,7 +46,7 @@ spec:
nodeName: talos-xuc-f2e nodeName: talos-xuc-f2e
containers: containers:
- name: disk-wipe - name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638 image: docker.io/library/alpine:3.19.1
securityContext: securityContext:
privileged: true privileged: true
resources: {} resources: {}
@ -83,7 +83,7 @@ spec:
nodeName: talos-opy-6ij nodeName: talos-opy-6ij
containers: containers:
- name: disk-wipe - name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638 image: docker.io/library/alpine:3.19.1
securityContext: securityContext:
privileged: true privileged: true
resources: {} resources: {}