aboutsummaryrefslogtreecommitdiffstats
path: root/tools/tts.sh
diff options
context:
space:
mode:
Diffstat (limited to 'tools/tts.sh')
-rwxr-xr-xtools/tts.sh115
1 files changed, 115 insertions, 0 deletions
diff --git a/tools/tts.sh b/tools/tts.sh
new file mode 100755
index 0000000..2065a3d
--- /dev/null
+++ b/tools/tts.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+# Function to display usage information
+usage() {
+ echo "Usage: $0 -l <lang> -v <voice> -s <speed> [--play] \"<text>\""
+ echo " -l|--lang : Specify the language (french|english)"
+ echo " -v|--voice : Specify the voice"
+ echo " -s|--speed : Specify the speed (0.0 > 3.0, default is 1.0)"
+ echo " --play : Play the generated audio file using ffplay"
+ echo " <text> : The text to synthesize"
+ exit 1
+}
+
+# Function to check if a value is a valid float between 0 and 3.0
+is_valid_float() {
+ local value=$1
+ # Check if the value is a valid number
+ if [[ $value =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then
+ # Check if the value is between 0 and 3.0
+ if (($(echo "$value >= 0" | bc -l))) && (($(echo "$value <= 3.0" | bc -l))); then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+# Check for required environment variable
+if [[ -z "${LLM_API_KEY}" ]]; then
+ echo "The environment variable LLM_API_KEY is not set."
+ echo 'You can use the following command: export $(xargs < ../.env))'
+ exit 1
+fi
+
+# Default values
+speed=1.0
+host=${TTS_API_HOST:-"http://localhost:8000"}
+play_audio=false
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ -l | --lang)
+ lang="$2"
+ shift 2
+ ;;
+ -v | --voice)
+ voice="$2"
+ shift 2
+ ;;
+ -s | --speed)
+ speed="$2"
+ shift 2
+ ;;
+ --play)
+ play_audio=true
+ shift 1
+ ;;
+ -h | --help)
+ usage
+ ;;
+ -* | --*)
+ echo "Unknown option $1"
+ usage
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+# Optionally grab the text after the options
+if [[ $# -gt 0 ]]; then
+ text="$*"
+else
+ echo "Error: Text to synthesize is required."
+ usage
+fi
+
+# Generate a timestamp
+timestamp=$(date +"%Y%m%d_%H%M%S")
+
+# Construct the filename with the current date and time
+filename="speech_${timestamp}.wav"
+
+# Validate language and voice options
+if [[ -z "$lang" || -z "$voice" ]]; then
+ echo "Error: Language (-l) and voice (-v) options are required."
+ usage
+fi
+
+# Check if the speed is valid
+if ! is_valid_float "$speed"; then
+ echo "Error: Speed must be a float between 0.0 and 3.0."
+ exit 1
+fi
+
+# Fetch the audio file from the API
+http_status_code=$(curl -s "${host}/v1/audio/speech" -o "${filename}" -w "%{http_code}" -H "Authorization: Bearer ${LLM_API_KEY}" -H "Content-Type: application/json" -d "{\"model\": \"tts-1\",\"input\": \"${text}\",\"voice\": \"${voice}\",\"response_format\": \"wav\",\"speed\": ${speed}}")
+
+# Check the response code for successful HTTP request
+if [[ "$http_status_code" -ne 200 ]]; then
+ echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
+ exit 1
+fi
+
+# Optionally play the generated WAV file with ffplay
+if [ "$play_audio" = true ]; then
+ if ! command -v ffplay &>/dev/null; then
+ echo "Error: ffplay is not installed. Please install mpv to play audio files."
+ exit 1
+ fi
+ ffplay ${filename} -nodisp -nostats -hide_banner -autoexit -v quiet
+fi
+
+echo "Audio file '$filename' generated successfully."