diff options
author | ben | 2025-01-12 14:37:13 +0100 |
---|---|---|
committer | ben | 2025-01-12 14:37:13 +0100 |
commit | 778188ed95ccf50d2e21938bf5b542d76e066f63 (patch) | |
tree | e5138e638da98036e03cb11b2b0cf48fe4c590b2 /tools | |
download | ai_env-778188ed95ccf50d2e21938bf5b542d76e066f63.tar.gz ai_env-778188ed95ccf50d2e21938bf5b542d76e066f63.tar.bz2 ai_env-778188ed95ccf50d2e21938bf5b542d76e066f63.tar.xz |
Initial commit, first public version.
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/aichat | bin | 0 -> 12073152 bytes | |||
-rwxr-xr-x | tools/stt.sh | 121 | ||||
-rwxr-xr-x | tools/tts.sh | 115 |
3 files changed, 236 insertions, 0 deletions
diff --git a/tools/aichat b/tools/aichat Binary files differnew file mode 100755 index 0000000..ff31ede --- /dev/null +++ b/tools/aichat diff --git a/tools/stt.sh b/tools/stt.sh new file mode 100755 index 0000000..13a1b5a --- /dev/null +++ b/tools/stt.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +# Function to print usage information +usage() { + echo "Usage: $0 [record|transcription] <options>" + echo "" + echo "Actions:" + echo " record Record audio from a selected source" + echo " transcription Transcribe audio from a .wav file" + echo "" + echo "Options for 'record':" + echo " -s, --source Specify the audio source (required)" + echo "" + echo "Options for 'transcription':" + echo " -f, --file Specify the audio file to transcribe (required)" + echo " -l, --lang Specify the audio file language (default: en)" + exit 1 +} + +if [[ $# -eq 0 ]]; then + usage +fi + +# Check for required environment variable +if [[ -z "${LLM_API_KEY}" ]]; then + echo "The environment variable LLM_API_KEY is not set." + echo 'You can use the following command: export $(xargs < ../.env))' + exit 1 +fi + +ACTION=$1 +shift + +host=${STT_API_HOST:-"http://localhost:8001"} +LANG="en" # Default language + +if [ "$ACTION" == "record" ]; then + if [ "$#" -eq 0 ]; then + echo "Error: Source is required for record action." + echo "Available sources:" + pactl list short sources | awk '{print $2}' + exit 1 + fi + + SOURCE="" + while [[ "$#" -gt 0 ]]; do + case $1 in + -s | --source) + SOURCE="$2" + shift + ;; + *) + echo "Unknown parameter passed: $1" + usage + ;; + esac + shift + done + + # Validate the provided source + if ! pactl list short sources | awk '{print $2}' | grep -q "^$SOURCE$"; then + echo "Error: Invalid audio source. Available sources:" + pactl list short sources | awk '{print $2}' + exit 1 + fi + + timestamp=$(date +"%Y%m%d_%H%M%S") + filename="record_${timestamp}.wav" + echo "Start recording to ${filename} ; use CTRL+C to terminate." + parec -d "${SOURCE}" --file-format=wav "${filename}" +elif [ "$ACTION" == "transcription" ]; then + if [ "$#" -eq 0 ]; then + echo "Error: File is required for transcription action." + usage + fi + + FILE="" + while [[ "$#" -gt 0 ]]; do + case $1 in + -f | --file) + FILE="$2" + shift + ;; + -l | --lang) + LANG="$2" + shift + ;; + *) + echo "Unknown parameter passed: $1" + usage + ;; + esac + shift + done + + if [ -z "$FILE" ]; then + echo "Error: File is required for transcription action." + usage + fi + + # Check if the file exists + if [ ! -f "$FILE" ]; then + echo "Error: File '$FILE' does not exist." + exit 1 + fi + + # Ensure that curl is available + if ! command -v curl &>/dev/null; then + echo "curl is required for transcription but could not be found on your system. Please install it." + exit 1 + fi + + # Transcribe the specified file + echo "Transcribing file $FILE, be patient" + curl "${host}/v1/audio/transcriptions" -H "Authorization: Bearer ${LLM_API_KEY}" \ + -F "file=@${FILE}" \ + -F "stream=true" \ + -F "language=${LANG}" +else + usage +fi diff --git a/tools/tts.sh b/tools/tts.sh new file mode 100755 index 0000000..2065a3d --- /dev/null +++ b/tools/tts.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# Function to display usage information +usage() { + echo "Usage: $0 -l <lang> -v <voice> -s <speed> [--play] \"<text>\"" + echo " -l|--lang : Specify the language (french|english)" + echo " -v|--voice : Specify the voice" + echo " -s|--speed : Specify the speed (0.0 > 3.0, default is 1.0)" + echo " --play : Play the generated audio file using ffplay" + echo " <text> : The text to synthesize" + exit 1 +} + +# Function to check if a value is a valid float between 0 and 3.0 +is_valid_float() { + local value=$1 + # Check if the value is a valid number + if [[ $value =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then + # Check if the value is between 0 and 3.0 + if (($(echo "$value >= 0" | bc -l))) && (($(echo "$value <= 3.0" | bc -l))); then + return 0 + fi + fi + return 1 +} + +# Check for required environment variable +if [[ -z "${LLM_API_KEY}" ]]; then + echo "The environment variable LLM_API_KEY is not set." + echo 'You can use the following command: export $(xargs < ../.env))' + exit 1 +fi + +# Default values +speed=1.0 +host=${TTS_API_HOST:-"http://localhost:8000"} +play_audio=false + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -l | --lang) + lang="$2" + shift 2 + ;; + -v | --voice) + voice="$2" + shift 2 + ;; + -s | --speed) + speed="$2" + shift 2 + ;; + --play) + play_audio=true + shift 1 + ;; + -h | --help) + usage + ;; + -* | --*) + echo "Unknown option $1" + usage + ;; + *) + break + ;; + esac +done + +# Optionally grab the text after the options +if [[ $# -gt 0 ]]; then + text="$*" +else + echo "Error: Text to synthesize is required." + usage +fi + +# Generate a timestamp +timestamp=$(date +"%Y%m%d_%H%M%S") + +# Construct the filename with the current date and time +filename="speech_${timestamp}.wav" + +# Validate language and voice options +if [[ -z "$lang" || -z "$voice" ]]; then + echo "Error: Language (-l) and voice (-v) options are required." + usage +fi + +# Check if the speed is valid +if ! is_valid_float "$speed"; then + echo "Error: Speed must be a float between 0.0 and 3.0." + exit 1 +fi + +# Fetch the audio file from the API +http_status_code=$(curl -s "${host}/v1/audio/speech" -o "${filename}" -w "%{http_code}" -H "Authorization: Bearer ${LLM_API_KEY}" -H "Content-Type: application/json" -d "{\"model\": \"tts-1\",\"input\": \"${text}\",\"voice\": \"${voice}\",\"response_format\": \"wav\",\"speed\": ${speed}}") + +# Check the response code for successful HTTP request +if [[ "$http_status_code" -ne 200 ]]; then + echo "Error: Failed to fetch audio file. Received HTTP status code: $http_status_code" + exit 1 +fi + +# Optionally play the generated WAV file with ffplay +if [ "$play_audio" = true ]; then + if ! command -v ffplay &>/dev/null; then + echo "Error: ffplay is not installed. Please install mpv to play audio files." + exit 1 + fi + ffplay ${filename} -nodisp -nostats -hide_banner -autoexit -v quiet +fi + +echo "Audio file '$filename' generated successfully." |