tools/speech.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

#!/bin/bash

SCRIPT=$(readlink -f "$0")
SCRIPTPATH=$(dirname "$SCRIPT")

commands=("argc" "curl" "ffplay")

for cmd in "${commands[@]}"; do
	if ! command -v "$cmd" &>/dev/null; then
		echo "Error: $cmd is required." >&2
		exit 1
	fi
done

# Check for required environment variable
if [[ -z "${LLM_API_KEY}" ]]; then
	echo "The environment variable LLM_API_KEY is not set."
	echo "Try to load ${SCRIPTPATH}/../.env file"
	export $(xargs <${SCRIPTPATH}/../.env)
fi
if [[ -z "${LLM_API_KEY}" ]]; then
	echo "Failed to load ${SCRIPTPATH}/../.env file"
	exit 1
fi

tts_host=${TTS_API_HOST:-"http://localhost:8000"}
stt_host=${STT_API_HOST:-"http://localhost:8001"}

_choice_voice() {

	if [[ "${argc_lang}" == "fr" ]]; then
		echo siwis
		echo tom
		echo pierre
		echo jessica
	fi
	if [[ "${argc_lang}" == "en" ]]; then
		echo alba
		echo jack
		echo john
		echo bryce
		echo ryan
		echo echo
	fi
}

# @cmd
# @flag       -p --play                                                 Play the generated speech
# @option     -l --lang![en|fr]                          Set the language
# @option     -v --voice![`_choice_voice`]        Set the voice
# @option     -s --speed=1.0                                    Set the speed
# @option     -f --filename=speech.wav            Set the output filename
# @arg         text!                                    Set the text
synthesize() {

	http_status_code=$(curl -s "${tts_host}/v1/audio/speech" -o "${argc_filename}" -w "%{http_code}" \
		-H "Authorization: Bearer ${LLM_API_KEY}" \
		-H "Content-Type: application/json" \
		-d "{\"model\": \"tts-1\",\"input\": \"${argc_text}\",\"voice\": \"${argc_voice}\",\"response_format\": \"wav\",\"speed\": ${argc_speed}}")

	# Check the response code for successful HTTP request
	if [[ "${http_status_code}" -ne 200 ]]; then
		echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
		exit 1
	fi

	if [[ $argc_play -eq 1 ]]; then
		ffplay "${argc_filename}" -nodisp -nostats -hide_banner -autoexit -v quiet
	fi
	echo "Audio file ${argc_filename} generated successfully."
}

_choice_source() {
	pactl list short sources | awk '{print $2}'
}

_choice_lang() {
	echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g'
}

# @cmd
# @option     -s --source![`_choice_source`]     Set the voice
# @option     -f --filename=record.wav            Set the output filename
record() {
	parec -d "${argc_source}" --file-format=wav "${argc_filename}"
}

# @cmd
# @option     -l --lang![`_choice_lang`]                                        Set the language
# @option     -f --filename!                            Set the output filename
transcript() {
	# Transcribe the specified file
	echo "Transcribing file ${argc_filename}, be patient"
	curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
		-F "file=@${argc_filename}" \
		-F "stream=true" \
		-F "language=${argc_lang}"
	echo

}

eval "$(argc --argc-eval "$0" "$@")"