blob: 2065a3db9f6f854f434ab2864de6411dd375de3d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
#!/bin/bash
# Function to display usage information
usage() {
echo "Usage: $0 -l <lang> -v <voice> -s <speed> [--play] \"<text>\""
echo " -l|--lang : Specify the language (french|english)"
echo " -v|--voice : Specify the voice"
echo " -s|--speed : Specify the speed (0.0 > 3.0, default is 1.0)"
echo " --play : Play the generated audio file using ffplay"
echo " <text> : The text to synthesize"
exit 1
}
# Function to check if a value is a valid float between 0 and 3.0
is_valid_float() {
local value=$1
# Check if the value is a valid number
if [[ $value =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then
# Check if the value is between 0 and 3.0
if (($(echo "$value >= 0" | bc -l))) && (($(echo "$value <= 3.0" | bc -l))); then
return 0
fi
fi
return 1
}
# Check for required environment variable
if [[ -z "${LLM_API_KEY}" ]]; then
echo "The environment variable LLM_API_KEY is not set."
echo 'You can use the following command: export $(xargs < ../.env))'
exit 1
fi
# Default values
speed=1.0
host=${TTS_API_HOST:-"http://localhost:8000"}
play_audio=false
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
-l | --lang)
lang="$2"
shift 2
;;
-v | --voice)
voice="$2"
shift 2
;;
-s | --speed)
speed="$2"
shift 2
;;
--play)
play_audio=true
shift 1
;;
-h | --help)
usage
;;
-* | --*)
echo "Unknown option $1"
usage
;;
*)
break
;;
esac
done
# Optionally grab the text after the options
if [[ $# -gt 0 ]]; then
text="$*"
else
echo "Error: Text to synthesize is required."
usage
fi
# Generate a timestamp
timestamp=$(date +"%Y%m%d_%H%M%S")
# Construct the filename with the current date and time
filename="speech_${timestamp}.wav"
# Validate language and voice options
if [[ -z "$lang" || -z "$voice" ]]; then
echo "Error: Language (-l) and voice (-v) options are required."
usage
fi
# Check if the speed is valid
if ! is_valid_float "$speed"; then
echo "Error: Speed must be a float between 0.0 and 3.0."
exit 1
fi
# Fetch the audio file from the API
http_status_code=$(curl -s "${host}/v1/audio/speech" -o "${filename}" -w "%{http_code}" -H "Authorization: Bearer ${LLM_API_KEY}" -H "Content-Type: application/json" -d "{\"model\": \"tts-1\",\"input\": \"${text}\",\"voice\": \"${voice}\",\"response_format\": \"wav\",\"speed\": ${speed}}")
# Check the response code for successful HTTP request
if [[ "$http_status_code" -ne 200 ]]; then
echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
exit 1
fi
# Optionally play the generated WAV file with ffplay
if [ "$play_audio" = true ]; then
if ! command -v ffplay &>/dev/null; then
echo "Error: ffplay is not installed. Please install mpv to play audio files."
exit 1
fi
ffplay ${filename} -nodisp -nostats -hide_banner -autoexit -v quiet
fi
echo "Audio file '$filename' generated successfully."
|