aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rwxr-xr-xtools/speech.sh98
-rwxr-xr-xtools/stt.sh121
-rwxr-xr-xtools/tts.sh115
3 files changed, 98 insertions, 236 deletions
diff --git a/tools/speech.sh b/tools/speech.sh
new file mode 100755
index 0000000..22b5929
--- /dev/null
+++ b/tools/speech.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+SCRIPT=$(readlink -f "$0")
+SCRIPTPATH=$(dirname "$SCRIPT")
+
+commands=("argc" "curl" "ffplay")
+
+for cmd in "${commands[@]}"; do
+ if ! command -v "$cmd" &>/dev/null; then
+ echo "Error: $cmd is required." >&2
+ exit 1
+ fi
+done
+
+# Check for required environment variable
+if [[ -z "${LLM_API_KEY}" ]]; then
+ echo "The environment variable LLM_API_KEY is not set."
+ echo "You can use the following command: 'export \$(xargs < ${SCRIPTPATH}/../.env)'"
+ exit 1
+fi
+
+tts_host=${TTS_API_HOST:-"http://localhost:8000"}
+stt_host=${STT_API_HOST:-"http://localhost:8001"}
+
+_choice_voice() {
+
+ if [[ "${argc_lang}" == "fr" ]]; then
+ echo siwis
+ echo tom
+ echo pierre
+ echo jessica
+ fi
+ if [[ "${argc_lang}" == "en" ]]; then
+ echo alba
+ echo jack
+ echo john
+ echo bryce
+ echo ryan
+ echo echo
+ fi
+}
+
+# @cmd
+# @flag -p --play Play the generated speech
+# @option -l --lang![en|fr] Set the language
+# @option -v --voice![`_choice_voice`] Set the voice
+# @option -s --speed=1.0 Set the speed
+# @option -f --filename=speech.wav Set the output filename
+# @arg text! Set the text
+synthesize() {
+
+ http_status_code=$(curl -s "${tts_host}/v1/audio/speech" -o "${argc_filename}" -w "%{http_code}" \
+ -H "Authorization: Bearer ${LLM_API_KEY}" \
+ -H "Content-Type: application/json" \
+ -d "{\"model\": \"tts-1\",\"input\": \"${argc_text}\",\"voice\": \"${argc_voice}\",\"response_format\": \"wav\",\"speed\": ${argc_speed}}")
+
+ # Check the response code for successful HTTP request
+ if [[ "${http_status_code}" -ne 200 ]]; then
+ echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
+ exit 1
+ fi
+
+ if [[ $argc_play -eq 1 ]]; then
+ ffplay "${argc_filename}" -nodisp -nostats -hide_banner -autoexit -v quiet
+ fi
+ echo "Audio file ${argc_filename} generated successfully."
+}
+
+_choice_source() {
+ pactl list short sources | awk '{print $2}'
+}
+
+_choice_lang() {
+ echo 'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue' or 'zh'","input":"english","ctx":{"expected":"'af', 'am', 'ar', 'as', 'az', 'ba', 'be', 'bg', 'bn', 'bo', 'br', 'bs', 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fo', 'fr', 'gl', 'gu', 'ha', 'haw', 'he', 'hi', 'hr', 'ht', 'hu', 'hy', 'id', 'is', 'it', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'la', 'lb', 'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'nn', 'no', 'oc', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'sa', 'sd', 'si', 'sk', 'sl', 'sn', 'so', 'sq', 'sr', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tk', 'tl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'yi', 'yo', 'yue', 'zh' | sed 's/, /\n/g'
+}
+
+# @cmd
+# @option -s --source![`_choice_source`] Set the voice
+# @option -f --filename=record.wav Set the output filename
+record() {
+ parec -d "${argc_source}" --file-format=wav "${argc_filename}"
+}
+
+# @cmd
+# @option -l --lang![`_choice_lang`] Set the language
+# @option -f --filename! Set the output filename
+transcript() {
+ # Transcribe the specified file
+ echo "Transcribing file ${argc_filename}, be patient"
+ curl "${stt_host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
+ -F "file=@${argc_filename}" \
+ -F "stream=true" \
+ -F "language=${argc_lang}"
+ echo
+
+}
+
+eval "$(argc --argc-eval "$0" "$@")"
diff --git a/tools/stt.sh b/tools/stt.sh
deleted file mode 100755
index 13a1b5a..0000000
--- a/tools/stt.sh
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/bin/bash
-
-# Function to print usage information
-usage() {
- echo "Usage: $0 [record|transcription] <options>"
- echo ""
- echo "Actions:"
- echo " record Record audio from a selected source"
- echo " transcription Transcribe audio from a .wav file"
- echo ""
- echo "Options for 'record':"
- echo " -s, --source Specify the audio source (required)"
- echo ""
- echo "Options for 'transcription':"
- echo " -f, --file Specify the audio file to transcribe (required)"
- echo " -l, --lang Specify the audio file language (default: en)"
- exit 1
-}
-
-if [[ $# -eq 0 ]]; then
- usage
-fi
-
-# Check for required environment variable
-if [[ -z "${LLM_API_KEY}" ]]; then
- echo "The environment variable LLM_API_KEY is not set."
- echo 'You can use the following command: export $(xargs < ../.env))'
- exit 1
-fi
-
-ACTION=$1
-shift
-
-host=${STT_API_HOST:-"http://localhost:8001"}
-LANG="en" # Default language
-
-if [ "$ACTION" == "record" ]; then
- if [ "$#" -eq 0 ]; then
- echo "Error: Source is required for record action."
- echo "Available sources:"
- pactl list short sources | awk '{print $2}'
- exit 1
- fi
-
- SOURCE=""
- while [[ "$#" -gt 0 ]]; do
- case $1 in
- -s | --source)
- SOURCE="$2"
- shift
- ;;
- *)
- echo "Unknown parameter passed: $1"
- usage
- ;;
- esac
- shift
- done
-
- # Validate the provided source
- if ! pactl list short sources | awk '{print $2}' | grep -q "^$SOURCE$"; then
- echo "Error: Invalid audio source. Available sources:"
- pactl list short sources | awk '{print $2}'
- exit 1
- fi
-
- timestamp=$(date +"%Y%m%d_%H%M%S")
- filename="record_${timestamp}.wav"
- echo "Start recording to ${filename} ; use CTRL+C to terminate."
- parec -d "${SOURCE}" --file-format=wav "${filename}"
-elif [ "$ACTION" == "transcription" ]; then
- if [ "$#" -eq 0 ]; then
- echo "Error: File is required for transcription action."
- usage
- fi
-
- FILE=""
- while [[ "$#" -gt 0 ]]; do
- case $1 in
- -f | --file)
- FILE="$2"
- shift
- ;;
- -l | --lang)
- LANG="$2"
- shift
- ;;
- *)
- echo "Unknown parameter passed: $1"
- usage
- ;;
- esac
- shift
- done
-
- if [ -z "$FILE" ]; then
- echo "Error: File is required for transcription action."
- usage
- fi
-
- # Check if the file exists
- if [ ! -f "$FILE" ]; then
- echo "Error: File '$FILE' does not exist."
- exit 1
- fi
-
- # Ensure that curl is available
- if ! command -v curl &>/dev/null; then
- echo "curl is required for transcription but could not be found on your system. Please install it."
- exit 1
- fi
-
- # Transcribe the specified file
- echo "Transcribing file $FILE, be patient"
- curl "${host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \
- -F "file=@${FILE}" \
- -F "stream=true" \
- -F "language=${LANG}"
-else
- usage
-fi
diff --git a/tools/tts.sh b/tools/tts.sh
deleted file mode 100755
index 2065a3d..0000000
--- a/tools/tts.sh
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/bin/bash
-
-# Function to display usage information
-usage() {
- echo "Usage: $0 -l <lang> -v <voice> -s <speed> [--play] \"<text>\""
- echo " -l|--lang : Specify the language (french|english)"
- echo " -v|--voice : Specify the voice"
- echo " -s|--speed : Specify the speed (0.0 > 3.0, default is 1.0)"
- echo " --play : Play the generated audio file using ffplay"
- echo " <text> : The text to synthesize"
- exit 1
-}
-
-# Function to check if a value is a valid float between 0 and 3.0
-is_valid_float() {
- local value=$1
- # Check if the value is a valid number
- if [[ $value =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then
- # Check if the value is between 0 and 3.0
- if (($(echo "$value >= 0" | bc -l))) && (($(echo "$value <= 3.0" | bc -l))); then
- return 0
- fi
- fi
- return 1
-}
-
-# Check for required environment variable
-if [[ -z "${LLM_API_KEY}" ]]; then
- echo "The environment variable LLM_API_KEY is not set."
- echo 'You can use the following command: export $(xargs < ../.env))'
- exit 1
-fi
-
-# Default values
-speed=1.0
-host=${TTS_API_HOST:-"http://localhost:8000"}
-play_audio=false
-
-# Parse command line arguments
-while [[ $# -gt 0 ]]; do
- case $1 in
- -l | --lang)
- lang="$2"
- shift 2
- ;;
- -v | --voice)
- voice="$2"
- shift 2
- ;;
- -s | --speed)
- speed="$2"
- shift 2
- ;;
- --play)
- play_audio=true
- shift 1
- ;;
- -h | --help)
- usage
- ;;
- -* | --*)
- echo "Unknown option $1"
- usage
- ;;
- *)
- break
- ;;
- esac
-done
-
-# Optionally grab the text after the options
-if [[ $# -gt 0 ]]; then
- text="$*"
-else
- echo "Error: Text to synthesize is required."
- usage
-fi
-
-# Generate a timestamp
-timestamp=$(date +"%Y%m%d_%H%M%S")
-
-# Construct the filename with the current date and time
-filename="speech_${timestamp}.wav"
-
-# Validate language and voice options
-if [[ -z "$lang" || -z "$voice" ]]; then
- echo "Error: Language (-l) and voice (-v) options are required."
- usage
-fi
-
-# Check if the speed is valid
-if ! is_valid_float "$speed"; then
- echo "Error: Speed must be a float between 0.0 and 3.0."
- exit 1
-fi
-
-# Fetch the audio file from the API
-http_status_code=$(curl -s "${host}/v1/audio/speech" -o "${filename}" -w "%{http_code}" -H "Authorization: Bearer ${LLM_API_KEY}" -H "Content-Type: application/json" -d "{\"model\": \"tts-1\",\"input\": \"${text}\",\"voice\": \"${voice}\",\"response_format\": \"wav\",\"speed\": ${speed}}")
-
-# Check the response code for successful HTTP request
-if [[ "$http_status_code" -ne 200 ]]; then
- echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code"
- exit 1
-fi
-
-# Optionally play the generated WAV file with ffplay
-if [ "$play_audio" = true ]; then
- if ! command -v ffplay &>/dev/null; then
- echo "Error: ffplay is not installed. Please install mpv to play audio files."
- exit 1
- fi
- ffplay ${filename} -nodisp -nostats -hide_banner -autoexit -v quiet
-fi
-
-echo "Audio file '$filename' generated successfully."