blob: 7cdcfdd220ef6ee8c5d2fc0f71695b963b84bf6b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
services:
ollama:
image: ollama/ollama
volumes:
- ollama:/root/.ollama
restart: unless-stopped
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
healthcheck:
test: ollama --version && ollama ps || exit 1
interval: 60s
retries: 5
start_period: 20s
timeout: 10s
openedai-speech:
build:
dockerfile: src/tts/Dockerfile
environment:
- TTS_HOME=voices
volumes:
- voices:/app/voices
- speech-config:/app/config
restart: unless-stopped
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
healthcheck:
test: curl --fail http://localhost:8000 || exit 1
interval: 60s
retries: 5
start_period: 10s
timeout: 10s
llm_provision:
build:
dockerfile: src/llm_provision/Dockerfile
environment:
- MODELS=qwen2.5:latest,qwen2.5-coder:32b,nomic-embed-text:latest
restart: no
depends_on:
ollama:
condition: service_healthy
restart: true
links:
- ollama
aichat-build:
build:
dockerfile: src/aichat/Dockerfile
restart: no
faster-whisper-server:
image: fedirz/faster-whisper-server:latest-cuda
environment:
- WHISPER__MODEL=Systran/faster-whisper-large-v3
volumes:
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
healthcheck:
test: timeout 10s bash -c ':> /dev/tcp/127.0.0.1/8000' || exit 1
interval: 30s
timeout: 15s
retries: 3
nginx:
image: nginx
volumes:
- ./src/nginx/nginx.conf:/etc/nginx/templates/nginx.conf.template
environment:
- NGINX_ENVSUBST_OUTPUT_DIR=/etc/nginx
- API_KEY=${LLM_API_KEY}
depends_on:
- openedai-speech
- faster-whisper-server
- ollama
links:
- ollama
- faster-whisper-server
- openedai-speech
ports:
- "11434:11434"
- "8000:8000"
- "8001:8001"
restart: unless-stopped
volumes:
ollama:
voices:
speech-config:
hf-hub-cache:
|