aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorben2023-03-04 22:22:22 +0100
committerben2025-03-04 21:37:38 +0100
commitf3eae794ace20d10edc4e970ce6258a47fb3b4d9 (patch)
treea663334c0e2716bcd4beaceb29891c0c5cb1468e
parent0f09f3fbb4df6849548007de481528974ba55579 (diff)
downloadai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.gz
ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.bz2
ai_env-f3eae794ace20d10edc4e970ce6258a47fb3b4d9.tar.xz
Remove STT feature (not fully offline)
-rw-r--r--README.md15
-rw-r--r--docker-compose.yml23
-rw-r--r--src/nginx/nginx.conf21
-rwxr-xr-xtools/speech.sh26
4 files changed, 9 insertions, 76 deletions
diff --git a/README.md b/README.md
index 7df591c..cc6a3e6 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,6 @@ No cloud. No GAFAM. Just full privacy, control, and the freedom to manipulate co
* [Ollama](https://ollama.com/) run language models on the local machine.
* [openedai-speech](https://github.com/matatonic/openedai-speech) provides text-to-speech capability.
-* [speaches-ai](https://github.com/speaches-ai/speaches) provide transcription, translation, and speech generation.
* [nginx](https://nginx.org/en/) add an authentication to the API.
* [AIChat](https://github.com/sigoden/aichat) is used as LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI Tools & Agents.
@@ -47,6 +46,12 @@ Add an API key to secure server access by adding a `.env` file like this:
LLM_API_KEY=1234567890
```
+Create a user authentication for aichat web UI:
+
+```
+htpasswd -c src/nginx/htpasswd user
+```
+
Next, start the servers and their configuration with Docker Compose:
```bash
@@ -92,13 +97,12 @@ aichat -r short "tcp port of mysql"
Go to the [AIChat](https://github.com/sigoden/aichat) website for other possible use cases.
-### Text To Speech & Speech To Text
+### Text To Speech
-For these two features, use the speech.sh script like this:
+For this features, use the speech.sh script like this:
```
./speech.sh synthesize --play --lang fr --voice pierre "Bonjour, aujourd'hui nous somme le $(date +%A\ %d\ %B\ %Y)."
-./speech.sh transcript --lang fr --filename speech.wav
```
## How to Use Remotely
@@ -109,11 +113,10 @@ By adding a reverse proxy like Caddy in front of it, you can also add TLS encryp
This way, you can securely use this environment remotely.
-To use script tools in a remote context, use the environment variables TTS_API_HOST and STT_API_HOST and modify AIChat config (~/.config/aichat/config.yaml) .
+To use script tools in a remote context, use the environment variables TTS_API_HOST and modify AIChat config (~/.config/aichat/config.yaml) .
Example:
```
export TTS_API_HOST="https://your-remote-domain"
-export STT_API_HOST="https://your-remote-domain"
./tools/speech.sh ...
```
diff --git a/docker-compose.yml b/docker-compose.yml
index 0bab263..65638a9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -70,26 +70,6 @@ services:
timeout: 15s
retries: 3
- faster-whisper-server:
- image: fedirz/faster-whisper-server:latest-cuda
- environment:
- - WHISPER__MODEL=Systran/faster-whisper-large-v3
- volumes:
- - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: all
- capabilities: [gpu]
- restart: unless-stopped
- healthcheck:
- test: curl --fail http://localhost:8000 || exit 1
- interval: 30s
- timeout: 15s
- retries: 3
-
nginx:
image: nginx
volumes:
@@ -99,19 +79,16 @@ services:
- API_KEY=${LLM_API_KEY}
depends_on:
- openedai-speech
- - faster-whisper-server
- ollama
- aichat
links:
- ollama
- - faster-whisper-server
- openedai-speech
- aichat
ports:
- "11434:11434"
- "8000:8000"
- "8001:8001"
- - "8002:8002"
restart: unless-stopped
volumes:
diff --git a/src/nginx/nginx.conf b/src/nginx/nginx.conf
index 5e411a9..fa4cb13 100644
--- a/src/nginx/nginx.conf
+++ b/src/nginx/nginx.conf
@@ -47,27 +47,6 @@ http {
return 403;
}
location / {
- proxy_pass http://faster-whisper-server:8000;
- proxy_set_header Host $host;
- proxy_set_header X-Real-IP $remote_addr;
- proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
- proxy_set_header X-Forwarded-Proto $scheme;
- proxy_read_timeout 180;
- proxy_http_version 1.1;
- proxy_set_header Upgrade $http_upgrade;
- proxy_set_header Connection "upgrade";
- }
- }
- server {
- listen 8002;
- set $deny 1;
- if ($http_authorization = "Bearer $API_KEY") {
- set $deny 0;
- }
- if ($deny) {
- return 403;
- }
- location / {
proxy_pass http://aichat:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
diff --git a/tools/speech.sh b/tools/speech.sh
index 1b62fbe..8ad7de2 100755
--- a/tools/speech.sh
+++ b/tools/speech.sh
@@ -24,7 +24,6 @@ if [[ -z "${LLM_API_KEY}" ]]; then
fi
tts_host=${TTS_API_HOST:-"http://localhost:8000"}
-stt_host=${STT_API_HOST:-"http://localhost:8001"}
_choice_voice() {
@@ -70,33 +69,8 @@ synthesize() {
echo "Audio file ${argc_filename} generated successfully."
}
-_choice_source() {
- pactl list short sources | awk '{print $2}'
-}
-
_choice_lang() {
echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g'
}
-# @cmd
-# @option -s --source![`_choice_source`] Set the voice
-# @option -f --filename=record.wav Set the output filename
-record() {
- parec -d "${argc_source}" --file-format=wav "${argc_filename}"
-}
-
-# @cmd
-# @option -l --lang![`_choice_lang`] Set the language
-# @option -f --filename! Set the output filename
-transcript() {
- # Transcribe the specified file
- echo "Transcribing file ${argc_filename}, be patient"
- curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
- -F "file=@${argc_filename}" \
- -F "stream=true" \
- -F "language=${argc_lang}"
- echo
-
-}
-
eval "$(argc --argc-eval "$0" "$@")"