Cleaning up scripts for speech functions

author: ben 2025-01-15 20:42:44 +0100
committer: ben 2025-01-15 20:42:44 +0100
commit: 6c1b211260463332f6858fe0a65a733120dfba66 (patch)
tree: 9df9bb4b45d37fb01fa1948a557ebed4ba1b495e /tools/speech.sh
parent: 947ee3cba20750adf842f366bc85ead31b573b21 (diff)
download: ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.gz
ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.bz2
ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.xz
1 files changed, 98 insertions, 0 deletions
diff --git a/tools/speech.sh b/tools/speech.sh
new file mode 100755
index 0000000..22b5929
--- /dev/null
+++ b/tools/speech.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+SCRIPT=$(readlink -f "$0")
+SCRIPTPATH=$(dirname "$SCRIPT")
+
+commands=("argc" "curl" "ffplay")
+
+for cmd in "${commands[@]}"; do
+	if ! command -v "$cmd" &>/dev/null; then
+		echo "Error: $cmd is required." >&2
+		exit 1
+	fi
+done
+
+# Check for required environment variable
+if [[ -z "${LLM_API_KEY}" ]]; then
+	echo "The environment variable LLM_API_KEY is not set."
+	echo "You can use the following command: 'export \$(xargs < ${SCRIPTPATH}/../.env)'"
+	exit 1
+fi
+
+tts_host=${TTS_API_HOST:-"http://localhost:8000"}
+stt_host=${STT_API_HOST:-"http://localhost:8001"}
+
+_choice_voice() {
+
+	if [[ "${argc_lang}" == "fr" ]]; then
+		echo siwis
+		echo tom
+		echo pierre
+		echo jessica
+	fi
+	if [[ "${argc_lang}" == "en" ]]; then
+		echo alba
+		echo jack
+		echo john
+		echo bryce
+		echo ryan
+		echo echo
+	fi
+}
+
+# @cmd
+# @flag       -p --play  						Play the generated speech
+# @option     -l --lang![en|fr]	 			 Set the language
+# @option     -v --voice![`_choice_voice`] 	  Set the voice
+# @option     -s --speed=1.0 					Set the speed
+# @option     -f --filename=speech.wav 		  Set the output filename
+# @arg         text!                 			Set the text
+synthesize() {
+
+	http_status_code=$(curl -s "${tts_host}/v1/audio/speech" -o "${argc_filename}" -w "%{http_code}" \
+		-H "Authorization: Bearer ${LLM_API_KEY}" \
+		-H "Content-Type: application/json" \
+		-d "{\"model\": \"tts-1\",\"input\": \"${argc_text}\",\"voice\": \"${argc_voice}\",\"response_format\": \"wav\",\"speed\": ${argc_speed}}")
+
+	# Check the response code for successful HTTP request
+	if [[ "${http_status_code}" -ne 200 ]]; then
+		echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
+		exit 1
+	fi
+
+	if [[ $argc_play -eq 1 ]]; then
+		ffplay "${argc_filename}" -nodisp -nostats -hide_banner -autoexit -v quiet
+	fi
+	echo "Audio file ${argc_filename} generated successfully."
+}
+
+_choice_source() {
+	pactl list short sources | awk '{print $2}'
+}
+
+_choice_lang() {
+	echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue' or 'zh'","input":"english","ctx":{"expected":"'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g'
+}
+
+# @cmd
+# @option     -s --source![`_choice_source`] 	 Set the voice
+# @option     -f --filename=record.wav 		  Set the output filename
+record() {
+	parec -d "${argc_source}" --file-format=wav "${argc_filename}"
+}
+
+# @cmd
+# @option     -l --lang![`_choice_lang`]				 	Set the language
+# @option     -f --filename!		 		Set the output filename
+transcript() {
+	# Transcribe the specified file
+	echo "Transcribing file ${argc_filename}, be patient"
+	curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
+		-F "file=@${argc_filename}" \
+		-F "stream=true" \
+		-F "language=${argc_lang}"
+	echo
+
+}
+
+eval "$(argc --argc-eval "$0" "$@")"
author	ben	2025-01-15 20:42:44 +0100
committer	ben	2025-01-15 20:42:44 +0100
commit	6c1b211260463332f6858fe0a65a733120dfba66 (patch)
tree	9df9bb4b45d37fb01fa1948a557ebed4ba1b495e /tools/speech.sh
parent	947ee3cba20750adf842f366bc85ead31b573b21 (diff)
download	ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.gz ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.bz2 ai_env-6c1b211260463332f6858fe0a65a733120dfba66.tar.xz