Remove STT feature (not fully offline)

author: ben 2023-03-04 22:22:22 +0100
committer: ben 2025-03-04 21:37:38 +0100
commit: f3eae794ace20d10edc4e970ce6258a47fb3b4d9 (patch)
tree: a663334c0e2716bcd4beaceb29891c0c5cb1468e
parent: 0f09f3fbb4df6849548007de481528974ba55579 (diff)
download: ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.gz
ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.bz2
ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.xz
4 files changed, 9 insertions, 76 deletions
diff --git a/README.md b/README.md
index 7df591c..cc6a3e6 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,6 @@ No cloud. No GAFAM. Just full privacy, control, and the freedom to manipulate co
 
 * [Ollama](https://ollama.com/) run language models on the local machine.
 * [openedai-speech](https://github.com/matatonic/openedai-speech) provides text-to-speech capability.
-* [speaches-ai](https://github.com/speaches-ai/speaches) provide transcription, translation, and speech generation.
 * [nginx](https://nginx.org/en/) add an authentication to the API.
 * [AIChat](https://github.com/sigoden/aichat) is used as LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI Tools & Agents.
 
@@ -47,6 +46,12 @@ Add an API key to secure server access by adding a `.env` file like this:
 LLM_API_KEY=1234567890
 ```
 
+Create a user authentication for aichat web UI:
+
+```
+htpasswd -c src/nginx/htpasswd user
+```
+
 Next, start the servers and their configuration with Docker Compose:
 
 ```bash
@@ -92,13 +97,12 @@ aichat -r short "tcp port of mysql"
 
 Go to the [AIChat](https://github.com/sigoden/aichat) website for other possible use cases.
 
-### Text To Speech & Speech To Text
+### Text To Speech
 
-For these two features, use the speech.sh script like this:
+For this features, use the speech.sh script like this:
 
 ```
 ./speech.sh synthesize --play --lang fr --voice pierre "Bonjour, aujourd'hui nous somme le $(date +%A\ %d\ %B\ %Y)."
-./speech.sh transcript --lang fr --filename speech.wav
 ```
 
 ## How to Use Remotely
@@ -109,11 +113,10 @@ By adding a reverse proxy like Caddy in front of it, you can also add TLS encryp
 
 This way, you can securely use this environment remotely.
 
-To use script tools in a remote context, use the environment variables TTS_API_HOST and STT_API_HOST and modify AIChat config (~/.config/aichat/config.yaml) .
+To use script tools in a remote context, use the environment variables TTS_API_HOST and modify AIChat config (~/.config/aichat/config.yaml) .
 
 Example:
 ```
 export TTS_API_HOST="https://your-remote-domain"
-export STT_API_HOST="https://your-remote-domain"
 ./tools/speech.sh ... 
 ```
diff --git a/docker-compose.yml b/docker-compose.yml
index 0bab263..65638a9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -70,26 +70,6 @@ services:
       timeout: 15s
       retries: 3
 
-  faster-whisper-server:
-    image: fedirz/faster-whisper-server:latest-cuda
-    environment:
-      - WHISPER__MODEL=Systran/faster-whisper-large-v3
-    volumes:
-      - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    restart: unless-stopped
-    healthcheck:
-      test: curl --fail http://localhost:8000 || exit 1
-      interval: 30s
-      timeout: 15s
-      retries: 3
-
   nginx:
     image: nginx
     volumes:
@@ -99,19 +79,16 @@ services:
       - API_KEY=${LLM_API_KEY}
     depends_on:
       - openedai-speech
-      - faster-whisper-server
       - ollama
       - aichat
     links:
       - ollama
-      - faster-whisper-server
       - openedai-speech
       - aichat
     ports:
       - "11434:11434"
       - "8000:8000"
       - "8001:8001"
-      - "8002:8002"
     restart: unless-stopped
 
 volumes:
diff --git a/src/nginx/nginx.conf b/src/nginx/nginx.conf
index 5e411a9..fa4cb13 100644
--- a/src/nginx/nginx.conf
+++ b/src/nginx/nginx.conf
@@ -47,27 +47,6 @@ http {
             return 403;
         }
         location / {
-            proxy_pass http://faster-whisper-server:8000;
-            proxy_set_header Host $host;
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_read_timeout 180;
-            proxy_http_version 1.1;
-            proxy_set_header Upgrade $http_upgrade;
-            proxy_set_header Connection "upgrade";
-        }
-    }
-    server {
-        listen 8002;
-        set $deny 1;
-        if ($http_authorization = "Bearer $API_KEY") {
-            set $deny 0;
-        }
-        if ($deny) {
-            return 403;
-        }
-        location / {
             proxy_pass http://aichat:8000;
             proxy_set_header Host $host;
             proxy_set_header X-Real-IP $remote_addr;
diff --git a/tools/speech.sh b/tools/speech.sh
index 1b62fbe..8ad7de2 100755
--- a/tools/speech.sh
+++ b/tools/speech.sh
@@ -24,7 +24,6 @@ if [[ -z "${LLM_API_KEY}" ]]; then
 fi
 
 tts_host=${TTS_API_HOST:-"http://localhost:8000"}
-stt_host=${STT_API_HOST:-"http://localhost:8001"}
 
 _choice_voice() {
 
@@ -70,33 +69,8 @@ synthesize() {
 	echo "Audio file ${argc_filename} generated successfully."
 }
 
-_choice_source() {
-	pactl list short sources | awk '{print $2}'
-}
-
 _choice_lang() {
 	echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g'
 }
 
-# @cmd
-# @option     -s --source![`_choice_source`]     Set the voice
-# @option     -f --filename=record.wav            Set the output filename
-record() {
-	parec -d "${argc_source}" --file-format=wav "${argc_filename}"
-}
-
-# @cmd
-# @option     -l --lang![`_choice_lang`]                                        Set the language
-# @option     -f --filename!                            Set the output filename
-transcript() {
-	# Transcribe the specified file
-	echo "Transcribing file ${argc_filename}, be patient"
-	curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
-		-F "file=@${argc_filename}" \
-		-F "stream=true" \
-		-F "language=${argc_lang}"
-	echo
-
-}
-
 eval "$(argc --argc-eval "$0" "$@")"
author	ben	2023-03-04 22:22:22 +0100
committer	ben	2025-03-04 21:37:38 +0100
commit	f3eae794ace20d10edc4e970ce6258a47fb3b4d9 (patch)
tree	a663334c0e2716bcd4beaceb29891c0c5cb1468e
parent	0f09f3fbb4df6849548007de481528974ba55579 (diff)
download	ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.gz ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.bz2 ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.xz