services: ollama: image: ollama/ollama volumes: - ollama:/root/.ollama restart: unless-stopped deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: ollama --version && ollama ps || exit 1 interval: 60s retries: 5 start_period: 20s timeout: 10s openedai-speech: build: dockerfile: src/tts/Dockerfile environment: - TTS_HOME=voices volumes: - voices:/app/voices - speech-config:/app/config restart: unless-stopped deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: curl --fail http://localhost:8000 || exit 1 interval: 60s retries: 5 start_period: 10s timeout: 10s llm_provision: build: dockerfile: src/llm_provision/Dockerfile environment: - MODELS=qwen2.5:latest,qwen2.5-coder:32b,nomic-embed-text:latest restart: no depends_on: ollama: condition: service_healthy restart: true links: - ollama aichat-build: build: dockerfile: src/aichat/Dockerfile restart: no faster-whisper-server: image: fedirz/faster-whisper-server:latest-cuda environment: - WHISPER__MODEL=Systran/faster-whisper-large-v3 volumes: - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped healthcheck: test: timeout 10s bash -c ':> /dev/tcp/127.0.0.1/8000' || exit 1 interval: 30s timeout: 15s retries: 3 nginx: image: nginx volumes: - ./src/nginx/nginx.conf:/etc/nginx/templates/nginx.conf.template environment: - NGINX_ENVSUBST_OUTPUT_DIR=/etc/nginx - API_KEY=${LLM_API_KEY} depends_on: - openedai-speech - faster-whisper-server - ollama links: - ollama - faster-whisper-server - openedai-speech ports: - "11434:11434" - "8000:8000" - "8001:8001" restart: unless-stopped volumes: ollama: voices: speech-config: hf-hub-cache: