diff --git a/kubernetes/apps/ai/kustomization.yaml b/kubernetes/apps/ai/kustomization.yaml new file mode 100644 index 00000000..2228a90e --- /dev/null +++ b/kubernetes/apps/ai/kustomization.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + # Pre Flux-Kustomizations + - ./namespace.yaml + # Flux-Kustomizations + - ./ollama/ks.yaml diff --git a/kubernetes/apps/ai/namespace.yaml b/kubernetes/apps/ai/namespace.yaml new file mode 100644 index 00000000..d19b0d7b --- /dev/null +++ b/kubernetes/apps/ai/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ai + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/kubernetes/apps/ai/ollama/app/helmrelease.yaml b/kubernetes/apps/ai/ollama/app/helmrelease.yaml new file mode 100644 index 00000000..1ec9396c --- /dev/null +++ b/kubernetes/apps/ai/ollama/app/helmrelease.yaml @@ -0,0 +1,85 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: &app ollama +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.2.1 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + strategy: rollback + values: + controllers: + ollama: + annotations: + reloader.stakater.com/auto: "true" + pod: + nodeSelector: + nvidia.com/gpu.present: "true" + runtimeClassName: nvidia + containers: + app: + image: + repository: docker.io/ollama/ollama + tag: 0.2.5 + env: + - name: OLLAMA_HOST + value: 0.0.0.0 + - name: OLLAMA_ORIGINS + value: "*" + - name: OLLAMA_MODELS + value: &modelPath "/models" + resources: + requests: + nvidia.com/gpu: 1 # requesting 1 GPU + cpu: 500m + memory: 2Gi + limits: + memory: 2Gi + service: + app: + controller: ollama + ports: + http: + port: 11434 + ingress: + app: + enabled: true + className: internal-nginx + hosts: + - host: &host "{{ .Release.Name }}.jahanson.tech" + paths: + - path: / + service: + identifier: app + port: http + tls: + - hosts: + - *host + persistence: + models: + enabled: true + existingClaim: ollama-models + advancedMounts: + backend: + main: + - path: *modelPath + config: + enabled: true + existingClaim: ollama + globalMounts: + - path: /root/.ollama diff --git a/kubernetes/apps/ai/ollama/app/kustomization.yaml b/kubernetes/apps/ai/ollama/app/kustomization.yaml new file mode 100644 index 00000000..5ca502cf --- /dev/null +++ b/kubernetes/apps/ai/ollama/app/kustomization.yaml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./helmrelease.yaml + - ./pvc.yaml + - ../../../../templates/volsync diff --git a/kubernetes/apps/ai/ollama/app/pvc.yaml b/kubernetes/apps/ai/ollama/app/pvc.yaml new file mode 100644 index 00000000..6b2734e2 --- /dev/null +++ b/kubernetes/apps/ai/ollama/app/pvc.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-models +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi + storageClassName: openebs-hostpath diff --git a/kubernetes/apps/ai/ollama/ks.yaml b/kubernetes/apps/ai/ollama/ks.yaml new file mode 100644 index 00000000..e2adb9aa --- /dev/null +++ b/kubernetes/apps/ai/ollama/ks.yaml @@ -0,0 +1,30 @@ +--- +# yaml-language-server: $schema=https://ks.hsn.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app ollama + namespace: flux-system +spec: + targetNamespace: ai + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: nvidia-device-plugin + - name: node-feature-discovery + - name: volsync + - name: openebs + path: ./kubernetes/apps/ai/ollama/app + prune: true + sourceRef: + kind: GitRepository + name: homelab + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m + postBuild: + substitute: + APP: *app + VOLSYNC_CAPACITY: 5Gi