tools/speech.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

#!/bin/bash

SCRIPT=$(readlink -f "$0")
SCRIPTPATH=$(dirname "$SCRIPT")

commands=("argc" "curl" "ffplay")

for cmd in "${commands[@]}"; do
	if ! command -v "$cmd" &>/dev/null; then
		echo "Error: $cmd is required." >&2
		exit 1
	fi
done

# Check for required environment variable
if [[ -z "${LLM_API_KEY}" ]]; then
	echo "The environment variable LLM_API_KEY is not set."
	echo "You can use the following command: 'export \$(xargs < ${SCRIPTPATH}/../.env)'"
	exit 1
fi

tts_host=${TTS_API_HOST:-"http://localhost:8000"}
stt_host=${STT_API_HOST:-"http://localhost:8001"}

_choice_voice() {

	if [[ "${argc_lang}" == "fr" ]]; then
		echo siwis
		echo tom
		echo pierre
		echo jessica
	fi
	if [[ "${argc_lang}" == "en" ]]; then
		echo alba
		echo jack
		echo john
		echo bryce
		echo ryan
		echo echo
	fi
}

# @cmd
# @flag       -p --play  						Play the generated speech
# @option     -l --lang![en|fr]	 			 Set the language
# @option     -v --voice![`_choice_voice`] 	  Set the voice
# @option     -s --speed=1.0 					Set the speed
# @option     -f --filename=speech.wav 		  Set the output filename
# @arg         text!                 			Set the text
synthesize() {

	http_status_code=$(curl -s "${tts_host}/v1/audio/speech" -o "${argc_filename}" -w "%{http_code}" \
		-H "Authorization: Bearer ${LLM_API_KEY}" \
		-H "Content-Type: application/json" \
		-d "{\"model\": \"tts-1\",\"input\": \"${argc_text}\",\"voice\": \"${argc_voice}\",\"response_format\": \"wav\",\"speed\": ${argc_speed}}")

	# Check the response code for successful HTTP request
	if [[ "${http_status_code}" -ne 200 ]]; then
		echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
		exit 1
	fi

	if [[ $argc_play -eq 1 ]]; then
		ffplay "${argc_filename}" -nodisp -nostats -hide_banner -autoexit -v quiet
	fi
	echo "Audio file ${argc_filename} generated successfully."
}

_choice_source() {
	pactl list short sources | awk '{print $2}'
}

_choice_lang() {
	echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g'
}

# @cmd
# @option     -s --source![`_choice_source`] 	 Set the voice
# @option     -f --filename=record.wav 		  Set the output filename
record() {
	parec -d "${argc_source}" --file-format=wav "${argc_filename}"
}

# @cmd
# @option     -l --lang![`_choice_lang`]				 	Set the language
# @option     -f --filename!		 		Set the output filename
transcript() {
	# Transcribe the specified file
	echo "Transcribing file ${argc_filename}, be patient"
	curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
		-F "file=@${argc_filename}" \
		-F "stream=true" \
		-F "language=${argc_lang}"
	echo

}

eval "$(argc --argc-eval "$0" "$@")"