diff options
-rw-r--r-- | README.md | 15 | ||||
-rw-r--r-- | docker-compose.yml | 23 | ||||
-rw-r--r-- | src/nginx/nginx.conf | 21 | ||||
-rwxr-xr-x | tools/speech.sh | 26 |
4 files changed, 9 insertions, 76 deletions
@@ -12,7 +12,6 @@ No cloud. No GAFAM. Just full privacy, control, and the freedom to manipulate co * [Ollama](https://ollama.com/) run language models on the local machine. * [openedai-speech](https://github.com/matatonic/openedai-speech) provides text-to-speech capability. -* [speaches-ai](https://github.com/speaches-ai/speaches) provide transcription, translation, and speech generation. * [nginx](https://nginx.org/en/) add an authentication to the API. * [AIChat](https://github.com/sigoden/aichat) is used as LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI Tools & Agents. @@ -47,6 +46,12 @@ Add an API key to secure server access by adding a `.env` file like this: LLM_API_KEY=1234567890 ``` +Create a user authentication for aichat web UI: + +``` +htpasswd -c src/nginx/htpasswd user +``` + Next, start the servers and their configuration with Docker Compose: ```bash @@ -92,13 +97,12 @@ aichat -r short "tcp port of mysql" Go to the [AIChat](https://github.com/sigoden/aichat) website for other possible use cases. -### Text To Speech & Speech To Text +### Text To Speech -For these two features, use the speech.sh script like this: +For this features, use the speech.sh script like this: ``` ./speech.sh synthesize --play --lang fr --voice pierre "Bonjour, aujourd'hui nous somme le $(date +%A\ %d\ %B\ %Y)." -./speech.sh transcript --lang fr --filename speech.wav ``` ## How to Use Remotely @@ -109,11 +113,10 @@ By adding a reverse proxy like Caddy in front of it, you can also add TLS encryp This way, you can securely use this environment remotely. -To use script tools in a remote context, use the environment variables TTS_API_HOST and STT_API_HOST and modify AIChat config (~/.config/aichat/config.yaml) . +To use script tools in a remote context, use the environment variables TTS_API_HOST and modify AIChat config (~/.config/aichat/config.yaml) . Example: ``` export TTS_API_HOST="https://your-remote-domain" -export STT_API_HOST="https://your-remote-domain" ./tools/speech.sh ... ``` diff --git a/docker-compose.yml b/docker-compose.yml index 0bab263..65638a9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -70,26 +70,6 @@ services: timeout: 15s retries: 3 - faster-whisper-server: - image: fedirz/faster-whisper-server:latest-cuda - environment: - - WHISPER__MODEL=Systran/faster-whisper-large-v3 - volumes: - - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - restart: unless-stopped - healthcheck: - test: curl --fail http://localhost:8000 || exit 1 - interval: 30s - timeout: 15s - retries: 3 - nginx: image: nginx volumes: @@ -99,19 +79,16 @@ services: - API_KEY=${LLM_API_KEY} depends_on: - openedai-speech - - faster-whisper-server - ollama - aichat links: - ollama - - faster-whisper-server - openedai-speech - aichat ports: - "11434:11434" - "8000:8000" - "8001:8001" - - "8002:8002" restart: unless-stopped volumes: diff --git a/src/nginx/nginx.conf b/src/nginx/nginx.conf index 5e411a9..fa4cb13 100644 --- a/src/nginx/nginx.conf +++ b/src/nginx/nginx.conf @@ -47,27 +47,6 @@ http { return 403; } location / { - proxy_pass http://faster-whisper-server:8000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_read_timeout 180; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - } - } - server { - listen 8002; - set $deny 1; - if ($http_authorization = "Bearer $API_KEY") { - set $deny 0; - } - if ($deny) { - return 403; - } - location / { proxy_pass http://aichat:8000; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; diff --git a/tools/speech.sh b/tools/speech.sh index 1b62fbe..8ad7de2 100755 --- a/tools/speech.sh +++ b/tools/speech.sh @@ -24,7 +24,6 @@ if [[ -z "${LLM_API_KEY}" ]]; then fi tts_host=${TTS_API_HOST:-"http://localhost:8000"} -stt_host=${STT_API_HOST:-"http://localhost:8001"} _choice_voice() { @@ -70,33 +69,8 @@ synthesize() { echo "Audio file ${argc_filename} generated successfully." } -_choice_source() { - pactl list short sources | awk '{print $2}' -} - _choice_lang() { echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g' } -# @cmd -# @option -s --source![`_choice_source`] Set the voice -# @option -f --filename=record.wav Set the output filename -record() { - parec -d "${argc_source}" --file-format=wav "${argc_filename}" -} - -# @cmd -# @option -l --lang![`_choice_lang`] Set the language -# @option -f --filename! Set the output filename -transcript() { - # Transcribe the specified file - echo "Transcribing file ${argc_filename}, be patient" - curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \ - -F "file=@${argc_filename}" \ - -F "stream=true" \ - -F "language=${argc_lang}" - echo - -} - eval "$(argc --argc-eval "$0" "$@")" |