88 lines
2.2 KiB
YAML
88 lines
2.2 KiB
YAML
---
|
|
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
|
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: &app ollama
|
|
spec:
|
|
interval: 30m
|
|
chart:
|
|
spec:
|
|
chart: app-template
|
|
version: 3.5.1
|
|
sourceRef:
|
|
kind: HelmRepository
|
|
name: bjw-s
|
|
namespace: flux-system
|
|
install:
|
|
remediation:
|
|
retries: 3
|
|
upgrade:
|
|
cleanupOnFail: true
|
|
remediation:
|
|
retries: 3
|
|
strategy: rollback
|
|
values:
|
|
controllers:
|
|
ollama:
|
|
annotations:
|
|
reloader.stakater.com/auto: "true"
|
|
pod:
|
|
nodeSelector:
|
|
nvidia.com/gpu.present: "true"
|
|
runtimeClassName: nvidia
|
|
containers:
|
|
app:
|
|
image:
|
|
repository: docker.io/ollama/ollama
|
|
tag: 0.3.13
|
|
env:
|
|
- name: OLLAMA_HOST
|
|
value: 0.0.0.0
|
|
- name: OLLAMA_ORIGINS
|
|
value: "*"
|
|
- name: OLLAMA_MODELS
|
|
value: &modelPath "/models"
|
|
- name: OLLAMA_KEEP_ALIVE
|
|
value: "24h"
|
|
resources:
|
|
requests:
|
|
nvidia.com/gpu: 1 # requesting 1 GPU
|
|
cpu: 500m
|
|
memory: 2Gi
|
|
limits:
|
|
memory: 16Gi
|
|
nvidia.com/gpu: 1 # requesting 1 GPU
|
|
service:
|
|
app:
|
|
controller: ollama
|
|
ports:
|
|
http:
|
|
port: 11434
|
|
ingress:
|
|
app:
|
|
enabled: true
|
|
className: internal-nginx
|
|
hosts:
|
|
- host: &host "{{ .Release.Name }}.jahanson.tech"
|
|
paths:
|
|
- path: /
|
|
service:
|
|
identifier: app
|
|
port: http
|
|
tls:
|
|
- hosts:
|
|
- *host
|
|
persistence:
|
|
models:
|
|
enabled: true
|
|
existingClaim: ollama-models
|
|
advancedMounts:
|
|
ollama:
|
|
app:
|
|
- path: *modelPath
|
|
config:
|
|
enabled: true
|
|
existingClaim: ollama
|
|
globalMounts:
|
|
- path: /root/.ollama
|