diff options
author | ben | 2025-01-12 14:37:13 +0100 |
---|---|---|
committer | ben | 2025-01-12 14:37:13 +0100 |
commit | 778188ed95ccf50d2e21938bf5b542d76e066f63 (patch) | |
tree | e5138e638da98036e03cb11b2b0cf48fe4c590b2 /tools/tts.sh | |
download | ai_env-778188ed95ccf50d2e21938bf5b542d76e066f63.tar.gz ai_env-778188ed95ccf50d2e21938bf5b542d76e066f63.tar.bz2 ai_env-778188ed95ccf50d2e21938bf5b542d76e066f63.tar.xz |
Initial commit, first public version.
Diffstat (limited to 'tools/tts.sh')
-rwxr-xr-x | tools/tts.sh | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/tools/tts.sh b/tools/tts.sh new file mode 100755 index 0000000..2065a3d --- /dev/null +++ b/tools/tts.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# Function to display usage information +usage() { + echo "Usage: $0 -l <lang> -v <voice> -s <speed> [--play] \"<text>\"" + echo " -l|--lang : Specify the language (french|english)" + echo " -v|--voice : Specify the voice" + echo " -s|--speed : Specify the speed (0.0 > 3.0, default is 1.0)" + echo " --play : Play the generated audio file using ffplay" + echo " <text> : The text to synthesize" + exit 1 +} + +# Function to check if a value is a valid float between 0 and 3.0 +is_valid_float() { + local value=$1 + # Check if the value is a valid number + if [[ $value =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then + # Check if the value is between 0 and 3.0 + if (($(echo "$value >= 0" | bc -l))) && (($(echo "$value <= 3.0" | bc -l))); then + return 0 + fi + fi + return 1 +} + +# Check for required environment variable +if [[ -z "${LLM_API_KEY}" ]]; then + echo "The environment variable LLM_API_KEY is not set." + echo 'You can use the following command: export $(xargs < ../.env))' + exit 1 +fi + +# Default values +speed=1.0 +host=${TTS_API_HOST:-"http://localhost:8000"} +play_audio=false + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -l | --lang) + lang="$2" + shift 2 + ;; + -v | --voice) + voice="$2" + shift 2 + ;; + -s | --speed) + speed="$2" + shift 2 + ;; + --play) + play_audio=true + shift 1 + ;; + -h | --help) + usage + ;; + -* | --*) + echo "Unknown option $1" + usage + ;; + *) + break + ;; + esac +done + +# Optionally grab the text after the options +if [[ $# -gt 0 ]]; then + text="$*" +else + echo "Error: Text to synthesize is required." + usage +fi + +# Generate a timestamp +timestamp=$(date +"%Y%m%d_%H%M%S") + +# Construct the filename with the current date and time +filename="speech_${timestamp}.wav" + +# Validate language and voice options +if [[ -z "$lang" || -z "$voice" ]]; then + echo "Error: Language (-l) and voice (-v) options are required." + usage +fi + +# Check if the speed is valid +if ! is_valid_float "$speed"; then + echo "Error: Speed must be a float between 0.0 and 3.0." + exit 1 +fi + +# Fetch the audio file from the API +http_status_code=$(curl -s "${host}/v1/audio/speech" -o "${filename}" -w "%{http_code}" -H "Authorization: Bearer ${LLM_API_KEY}" -H "Content-Type: application/json" -d "{\"model\": \"tts-1\",\"input\": \"${text}\",\"voice\": \"${voice}\",\"response_format\": \"wav\",\"speed\": ${speed}}") + +# Check the response code for successful HTTP request +if [[ "$http_status_code" -ne 200 ]]; then + echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code" + exit 1 +fi + +# Optionally play the generated WAV file with ffplay +if [ "$play_audio" = true ]; then + if ! command -v ffplay &>/dev/null; then + echo "Error: ffplay is not installed. Please install mpv to play audio files." + exit 1 + fi + ffplay ${filename} -nodisp -nostats -hide_banner -autoexit -v quiet +fi + +echo "Audio file '$filename' generated successfully." |