aboutsummaryrefslogtreecommitdiffstats
path: root/tools/speech.sh
diff options
context:
space:
mode:
authorben2025-01-15 20:42:44 +0100
committerben2025-01-15 20:42:44 +0100
commit6c1b211260463332f6858fe0a65a733120dfba66 (patch)
tree9df9bb4b45d37fb01fa1948a557ebed4ba1b495e /tools/speech.sh
parent947ee3cba20750adf842f366bc85ead31b573b21 (diff)
downloadai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.gz
ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.bz2
ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.xz
Cleaning up scripts for speech functions
Diffstat (limited to 'tools/speech.sh')
-rwxr-xr-xtools/speech.sh98
1 files changed, 98 insertions, 0 deletions
diff --git a/tools/speech.sh b/tools/speech.sh
new file mode 100755
index 0000000..22b5929
--- /dev/null
+++ b/tools/speech.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+SCRIPT=$(readlink -f "$0")
+SCRIPTPATH=$(dirname "$SCRIPT")
+
+commands=("argc" "curl" "ffplay")
+
+for cmd in "${commands[@]}"; do
+ if ! command -v "$cmd" &>/dev/null; then
+ echo "Error: $cmd is required." >&2
+ exit 1
+ fi
+done
+
+# Check for required environment variable
+if [[ -z "${LLM_API_KEY}" ]]; then
+ echo "The environment variable LLM_API_KEY is not set."
+ echo "You can use the following command: 'export \$(xargs < ${SCRIPTPATH}/../.env)'"
+ exit 1
+fi
+
+tts_host=${TTS_API_HOST:-"http://localhost:8000"}
+stt_host=${STT_API_HOST:-"http://localhost:8001"}
+
+_choice_voice() {
+
+ if [[ "${argc_lang}" == "fr" ]]; then
+ echo siwis
+ echo tom
+ echo pierre
+ echo jessica
+ fi
+ if [[ "${argc_lang}" == "en" ]]; then
+ echo alba
+ echo jack
+ echo john
+ echo bryce
+ echo ryan
+ echo echo
+ fi
+}
+
+# @cmd
+# @flag -p --play Play the generated speech
+# @option -l --lang![en|fr] Set the language
+# @option -v --voice![`_choice_voice`] Set the voice
+# @option -s --speed=1.0 Set the speed
+# @option -f --filename=speech.wav Set the output filename
+# @arg text! Set the text
+synthesize() {
+
+ http_status_code=$(curl -s "${tts_host}/v1/audio/speech" -o "${argc_filename}" -w "%{http_code}" \
+ -H "Authorization: Bearer ${LLM_API_KEY}" \
+ -H "Content-Type: application/json" \
+ -d "{\"model\": \"tts-1\",\"input\": \"${argc_text}\",\"voice\": \"${argc_voice}\",\"response_format\": \"wav\",\"speed\": ${argc_speed}}")
+
+ # Check the response code for successful HTTP request
+ if [[ "${http_status_code}" -ne 200 ]]; then
+ echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
+ exit 1
+ fi
+
+ if [[ $argc_play -eq 1 ]]; then
+ ffplay "${argc_filename}" -nodisp -nostats -hide_banner -autoexit -v quiet
+ fi
+ echo "Audio file ${argc_filename} generated successfully."
+}
+
+_choice_source() {
+ pactl list short sources | awk '{print $2}'
+}
+
+_choice_lang() {
+ echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue' or 'zh'","input":"english","ctx":{"expected":"'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g'
+}
+
+# @cmd
+# @option -s --source![`_choice_source`] Set the voice
+# @option -f --filename=record.wav Set the output filename
+record() {
+ parec -d "${argc_source}" --file-format=wav "${argc_filename}"
+}
+
+# @cmd
+# @option -l --lang![`_choice_lang`] Set the language
+# @option -f --filename! Set the output filename
+transcript() {
+ # Transcribe the specified file
+ echo "Transcribing file ${argc_filename}, be patient"
+ curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
+ -F "file=@${argc_filename}" \
+ -F "stream=true" \
+ -F "language=${argc_lang}"
+ echo
+
+}
+
+eval "$(argc --argc-eval "$0" "$@")"