From 6c1b211260463332f6858fe0a65a733120dfba66 Mon Sep 17 00:00:00 2001 From: ben Date: Wed, 15 Jan 2025 20:42:44 +0100 Subject: Cleaning up scripts for speech functions --- tools/speech.sh | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100755 tools/speech.sh (limited to 'tools/speech.sh') diff --git a/tools/speech.sh b/tools/speech.sh new file mode 100755 index 0000000..22b5929 --- /dev/null +++ b/tools/speech.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +SCRIPT=$(readlink -f "$0") +SCRIPTPATH=$(dirname "$SCRIPT") + +commands=("argc" "curl" "ffplay") + +for cmd in "${commands[@]}"; do + if ! command -v "$cmd" &>/dev/null; then + echo "Error: $cmd is required." >&2 + exit 1 + fi +done + +# Check for required environment variable +if [[ -z "${LLM_API_KEY}" ]]; then + echo "The environment variable LLM_API_KEY is not set." + echo "You can use the following command: 'export \$(xargs < ${SCRIPTPATH}/../.env)'" + exit 1 +fi + +tts_host=${TTS_API_HOST:-"http://localhost:8000"} +stt_host=${STT_API_HOST:-"http://localhost:8001"} + +_choice_voice() { + + if [[ "${argc_lang}" == "fr" ]]; then + echo siwis + echo tom + echo pierre + echo jessica + fi + if [[ "${argc_lang}" == "en" ]]; then + echo alba + echo jack + echo john + echo bryce + echo ryan + echo echo + fi +} + +# @cmd +# @flag -p --play Play the generated speech +# @option -l --lang![en|fr] Set the language +# @option -v --voice![`_choice_voice`] Set the voice +# @option -s --speed=1.0 Set the speed +# @option -f --filename=speech.wav Set the output filename +# @arg text! Set the text +synthesize() { + + http_status_code=$(curl -s "${tts_host}/v1/audio/speech" -o "${argc_filename}" -w "%{http_code}" \ + -H "Authorization: Bearer ${LLM_API_KEY}" \ + -H "Content-Type: application/json" \ + -d "{\"model\": \"tts-1\",\"input\": \"${argc_text}\",\"voice\": \"${argc_voice}\",\"response_format\": \"wav\",\"speed\": ${argc_speed}}") + + # Check the response code for successful HTTP request + if [[ "${http_status_code}" -ne 200 ]]; then + echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code" + exit 1 + fi + + if [[ $argc_play -eq 1 ]]; then + ffplay "${argc_filename}" -nodisp -nostats -hide_banner -autoexit -v quiet + fi + echo "Audio file ${argc_filename} generated successfully." +} + +_choice_source() { + pactl list short sources | awk '{print $2}' +} + +_choice_lang() { + echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue' or 'zh'","input":"english","ctx":{"expected":"'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g' +} + +# @cmd +# @option -s --source![`_choice_source`] Set the voice +# @option -f --filename=record.wav Set the output filename +record() { + parec -d "${argc_source}" --file-format=wav "${argc_filename}" +} + +# @cmd +# @option -l --lang![`_choice_lang`] Set the language +# @option -f --filename! Set the output filename +transcript() { + # Transcribe the specified file + echo "Transcribing file ${argc_filename}, be patient" + curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \ + -F "file=@${argc_filename}" \ + -F "stream=true" \ + -F "language=${argc_lang}" + echo + +} + +eval "$(argc --argc-eval "$0" "$@")" -- cgit v1.2.3