Merge pull request 'scale-to-one-node' (#269) from scale-to-one-node into main

Reviewed-on: jahanson/homelab#269
This commit is contained in:
Joseph Hanson 2024-05-07 18:37:01 +00:00
commit 1506773327
28 changed files with 134 additions and 80 deletions

View file

@ -87,7 +87,7 @@ tasks:
"containers": [
{
"name": "debug",
"image": "ghcr.io/onedr0p/alpine:rolling",
"image": "docker.io/library/alpine:3.19.1",
"command": ["/bin/bash"],
"stdin": true,
"stdinOnce": true,

View file

@ -39,7 +39,7 @@ spec:
metadata:
labels:
app.kubernetes.io/name: crunchy-postgres
replicas: &replica 3
replicas: &replica 1
dataVolumeClaimSpec:
storageClassName: openebs-hostpath
accessModes:
@ -161,4 +161,4 @@ spec:
labelSelector:
matchLabels:
postgres-operator.crunchydata.com/cluster: *name
postgres-operator.crunchydata.com/role: "pgbouncer"
postgres-operator.crunchydata.com/role: "pgbouncer"

View file

@ -7,7 +7,7 @@ metadata:
app.kubernetes.io/name: dragonfly
name: dragonfly
spec:
replicas: 2
replicas: 1
resources:
requests:
cpu: 500m
@ -26,4 +26,4 @@ spec:
# - ReadWriteOnce
# resources:
# requests:
# storage: 2Gi
# storage: 2Gi

View file

@ -23,7 +23,7 @@ containerRuntime:
localRedirectPolicy: true
operator:
rollOutPods: true
replicas: 1
ipam:
mode: kubernetes
kubeProxyReplacement: true
@ -58,4 +58,4 @@ securityContext:
cleanCiliumState:
- NET_ADMIN
- SYS_ADMIN
- SYS_RESOURCE
- SYS_RESOURCE

View file

@ -24,7 +24,7 @@ spec:
uninstall:
keepHistory: false
values:
replicas: 2
replicas: 1
kind: Deployment
deschedulerPolicyAPIVersion: descheduler/v1alpha2
deschedulerPolicy:
@ -74,4 +74,4 @@ spec:
serviceMonitor:
enabled: true
leaderElection:
enabled: true
enabled: true

View file

@ -0,0 +1,3 @@
---
providerRegex: ^shadowfax$
bypassDnsResolution: true

View file

@ -0,0 +1,32 @@
---
# yaml-language-server: $schema=https://ks.hsn.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: kubelet-csr-approver
spec:
interval: 30m
chart:
spec:
chart: kubelet-csr-approver
version: 1.1.0
sourceRef:
kind: HelmRepository
name: postfinance
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
valuesFrom:
- kind: ConfigMap
name: kubelet-csr-approver-helm-values
values:
metrics:
enable: true
serviceMonitor:
enabled: true

View file

@ -0,0 +1,12 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization.json
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
configMapGenerator:
- name: kubelet-csr-approver-helm-values
files:
- values.yaml=./helm-values.yaml
configurations:
- kustomizeconfig.yaml

View file

@ -0,0 +1,7 @@
---
nameReference:
- kind: ConfigMap
version: v1
fieldSpecs:
- path: spec/valuesFrom/name
kind: HelmRelease

View file

@ -0,0 +1,21 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app kubelet-csr-approver
namespace: flux-system
spec:
targetNamespace: kube-system
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/apps/kube-system/kubelet-csr-approver/app
prune: false # never should be deleted
sourceRef:
kind: GitRepository
name: homelab
wait: false
interval: 30m
retryInterval: 1m
timeout: 5m

View file

@ -10,6 +10,7 @@ resources:
- ./descheduler/ks.yaml
- ./dnsimple-webhook-rbac.yaml
- ./fstrim/ks.yaml
- ./kubelet-csr-approver/ks.yaml
- ./metrics-server/ks.yaml
- ./multus/ks.yaml
- ./intel-device-plugin/ks.yaml

View file

@ -56,7 +56,7 @@ spec:
serviceMonitor:
enabled: true
admissionController:
replicas: 3
replicas: 1
serviceMonitor:
enabled: true
rbac:

View file

@ -42,7 +42,7 @@ spec:
metadata:
labels:
app.kubernetes.io/name: pgo-${APP}
replicas: 2
replicas: 1
dataVolumeClaimSpec:
storageClassName: openebs-hostpath
accessModes:

View file

@ -28,7 +28,7 @@ spec:
values:
controllers:
cloudflared:
replicas: 2
replicas: 1
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"

View file

@ -22,7 +22,7 @@ spec:
valuesKey: MAXMIND_LICENSE_KEY
values:
controller:
replicaCount: 2
replicaCount: 1
updateStrategy:
type: RollingUpdate
allowSnippetAnnotations: true

View file

@ -20,7 +20,7 @@ spec:
fullnameOverride: nginx-internal
controller:
replicaCount: 3
replicaCount: 1
updateStrategy:
type: RollingUpdate

View file

@ -29,7 +29,7 @@ spec:
- name: loki
namespace: observability
values:
replicas: 2
replicas: 1
envFromSecret: grafana-secret
dashboardProviders:
dashboardproviders.yaml:
@ -398,4 +398,4 @@ spec:
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: grafana
app.kubernetes.io/name: grafana

View file

@ -45,7 +45,7 @@ spec:
- hosts:
- *host
alertmanagerSpec:
replicas: 2
replicas: 1
useExistingSecret: true
configSecret: alertmanager-secret
storage:
@ -117,7 +117,7 @@ spec:
podMetadata:
annotations:
secret.reloader.stakater.com/reload: &secret thanos-objstore-config
replicas: 2
replicas: 1
replicaExternalLabelName: __replica__
scrapeInterval: 1m # Must match interval in Grafana Helm chart
ruleSelectorNilUsesHelmValues: false
@ -194,4 +194,4 @@ spec:
grafana_folder: Kubernetes
multicluster:
etcd:
enabled: true
enabled: true

View file

@ -111,12 +111,12 @@ spec:
analytics:
reporting_enabled: false
backend:
replicas: 2
replicas: 1
persistence:
size: 20Gi
storageClass: openebs-hostpath
gateway:
replicas: 2
replicas: 1
image:
registry: ghcr.io
ingress:
@ -130,9 +130,9 @@ spec:
tls:
- hosts: [*host]
read:
replicas: 2
replicas: 1
write:
replicas: 2
replicas: 1
persistence:
size: 20Gi
storageClass: openebs-hostpath
@ -145,4 +145,4 @@ spec:
lokiCanary:
enabled: false
test:
enabled: false
enabled: false

View file

@ -75,11 +75,11 @@ spec:
storageClass: openebs-hostpath
size: 10Gi
query:
replicas: 2
replicas: 1
extraArgs: ["--alert.query-url=https://thanos.jahanson.tech"]
queryFrontend:
enabled: true
replicas: 2
replicas: 1
extraEnv: &extraEnv
- name: THANOS_CACHE_CONFIG
valueFrom:
@ -98,7 +98,7 @@ spec:
configmap.reloader.stakater.com/reload: *configMap
rule:
enabled: true
replicas: 2
replicas: 1
extraArgs: ["--web.prefix-header=X-Forwarded-Prefix"]
alertmanagersConfig:
value: |-
@ -120,8 +120,8 @@ spec:
severity: critical
persistence: *persistence
storeGateway:
replicas: 2
replicas: 1
extraEnv: *extraEnv
extraArgs: ["--index-cache.config=$(THANOS_CACHE_CONFIG)"]
persistence: *persistence
podAnnotations: *podAnnotations
podAnnotations: *podAnnotations

View file

@ -26,7 +26,7 @@ spec:
values:
controllers:
vector-aggregator:
replicas: 2
replicas: 1
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"
@ -88,4 +88,4 @@ spec:
geoip:
type: emptyDir
globalMounts:
- path: /usr/share/GeoIP
- path: /usr/share/GeoIP

View file

@ -49,6 +49,7 @@ spec:
bdev_enable_discard = true
bdev_async_discard = true
osd_class_update_on_start = false
osd_pool_default_size = 1
cephClusterSpec:
network:
provider: host
@ -63,20 +64,7 @@ spec:
storage:
useAllNodes: true
useAllDevices: false
deviceFilter: "xvdb|nvme1n1|nvme0n1"
placement:
mgr: &placement
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
tolerations: # allow mgr to run on control plane nodes
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
mon: *placement
deviceFilter: "nvme2n1"
resources:
mgr:
requests:
@ -103,8 +91,6 @@ spec:
- name: ceph-blockpool
spec:
failureDomain: host
replicated:
size: 3
storageClass:
enabled: true
name: ceph-block
@ -130,12 +116,8 @@ spec:
- name: ceph-filesystem
spec:
metadataPool:
replicated:
size: 3
dataPools:
- failureDomain: host
replicated:
size: 3
name: data0
metadataServer:
activeCount: 1
@ -171,13 +153,8 @@ spec:
spec:
metadataPool:
failureDomain: host
replicated:
size: 3
dataPool:
failureDomain: host
erasureCoded:
dataChunks: 2
codingChunks: 1
preservePoolsOnDelete: true
gateway:
port: 80

View file

@ -18,7 +18,7 @@ spec:
namespace: flux-system
values:
installCRDs: true
replicaCount: 3
replicaCount: 1
leaderElect: true
serviceMonitor:
enabled: true

View file

@ -10,8 +10,8 @@ helmDefaults:
repositories:
- name: cilium
url: https://helm.cilium.io
- name: nvdp
url: https://nvidia.github.io/k8s-device-plugin
- name: postfinance
url: https://postfinance.github.io/kubelet-csr-approver
releases:
- name: cilium
@ -20,15 +20,15 @@ releases:
version: 1.15.4
values: ["../../../apps/kube-system/cilium/app/resources/values.yml"]
wait: true
- name: nvidia-device-plugin
- name: kubelet-csr-approver
namespace: kube-system
chart: nvdp/nvidia-device-plugin
version: 0.14.5
values: ["../../../apps/kube-system/nvidia-device-plugin/app/resources/values.yml"]
wait: true
chart: postfinance/kubelet-csr-approver
version: 1.1.0
values: ["../../../apps/kube-system/kubelet-csr-approver/app/helm-values.yaml"]
needs: ["cilium"]
- name: spegel
namespace: kube-system
chart: oci://ghcr.io/spegel-org/helm-charts/spegel
version: v0.0.22
values: ["../../../apps/kube-system/spegel/app/resources/values.yml"]
wait: true
wait: true

View file

@ -6,9 +6,6 @@ talosVersion: v1.7.1
kubernetesVersion: 1.28.4
endpoint: "https://10.1.1.57:6443"
cniConfig:
name: none
additionalApiServerCertSans:
- 10.1.1.57
@ -21,10 +18,12 @@ nodes:
ipAddress: 10.1.1.61
controlPlane: true
installDiskSelector:
busPath: /dev/nvme0n1
busPath: /pci0000:20/0000:20:01.2/0000:2d:00.0/nvme/nvme1/nvme1n1
networkInterfaces:
- interface: eth0
- interface: enp37s0f1
dhcp: true
- interface: enp37s0f0
dhcp: false
kernelModules:
- name: nvidia
- name: nvidia_uvm
@ -55,7 +54,7 @@ controlPlane:
machine:
network:
nameservers:
- 10.1.1.11
- 10.1.1.1
# Configure NTP
- |-
@ -79,6 +78,9 @@ controlPlane:
allowSchedulingOnMasters: true
proxy:
disabled: true
network:
cni:
name: none
# ETCD configuration
- |-

View file

@ -34,7 +34,7 @@ spec:
metadata:
labels:
app.kubernetes.io/name: pgo-${APP}
replicas: 2
replicas: 1
dataVolumeClaimSpec:
storageClassName: openebs-hostpath
accessModes:

View file

@ -3,19 +3,19 @@ apiVersion: v1
kind: Pod
metadata:
name: disk-wipe-one
namespace: rook-ceph
namespace: kube-system
spec:
restartPolicy: Never
nodeName: talos-ltk-p4a
nodeName: shadowfax
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
image: docker.io/library/alpine:3.19.1
securityContext:
privileged: true
resources: {}
env:
- name: CEPH_DISK
value: "/dev/xvdb"
value: "/dev/nvme2n1"
command:
[
"/bin/sh",
@ -34,4 +34,3 @@ spec:
- name: host-var
hostPath:
path: /var

View file

@ -9,7 +9,7 @@ spec:
nodeName: talos-fki-fmf
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
image: docker.io/library/alpine:3.19.1
securityContext:
privileged: true
resources: {}
@ -46,7 +46,7 @@ spec:
nodeName: talos-xuc-f2e
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
image: docker.io/library/alpine:3.19.1
securityContext:
privileged: true
resources: {}
@ -83,7 +83,7 @@ spec:
nodeName: talos-opy-6ij
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.19.1@sha256:3fbc581cb0fe29830376161ae026e2a765dcc11e1747477fe9ebf155720b8638
image: docker.io/library/alpine:3.19.1
securityContext:
privileged: true
resources: {}