diff options
Diffstat (limited to 'src/tts')
-rw-r--r-- | src/tts/Dockerfile | 47 | ||||
-rw-r--r-- | src/tts/download_voices_tts-1.sh | 8 | ||||
-rw-r--r-- | src/tts/voice_to_speaker.default.yaml | 36 |
3 files changed, 91 insertions, 0 deletions
diff --git a/src/tts/Dockerfile b/src/tts/Dockerfile new file mode 100644 index 0000000..1636bd2 --- /dev/null +++ b/src/tts/Dockerfile @@ -0,0 +1,47 @@ +FROM python:3.11-slim + +RUN --mount=type=cache,target=/root/.cache/pip pip install -U pip + +ARG TARGETPLATFORM +RUN <<EOF +apt-get update +apt-get install --no-install-recommends -y curl ffmpeg git +if [ "$TARGETPLATFORM" != "linux/amd64" ]; then + apt-get install --no-install-recommends -y build-essential + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +fi + +# for deepspeed support - image +7.5GB, over the 10GB ghcr.io limit, and no noticable gain in speed or VRAM usage? +#curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb +#dpkg -i cuda-keyring_1.1-1_all.deb +#rm cuda-keyring_1.1-1_all.deb +#apt-get install --no-install-recommends -y libaio-dev build-essential cuda-toolkit + +apt-get clean +rm -rf /var/lib/apt/lists/* +EOF +#ENV CUDA_HOME=/usr/local/cuda +ENV PATH="/root/.cargo/bin:${PATH}" + +WORKDIR /app +RUN mkdir -p voices config + +ARG USE_ROCM +ENV USE_ROCM=${USE_ROCM} + +RUN git clone https://github.com/matatonic/openedai-speech.git /tmp/app +RUN mv /tmp/app/* /app/ +ADD src/tts/download_voices_tts-1.sh /app/download_voices_tts-1.sh +ADD src/tts/voice_to_speaker.default.yaml /app/voice_to_speaker.default.yaml +RUN if [ "${USE_ROCM}" = "1" ]; then mv /app/requirements-rocm.txt /app/requirements.txt; fi +RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt + + +ARG PRELOAD_MODEL +ENV PRELOAD_MODEL=${PRELOAD_MODEL} +ENV TTS_HOME=voices +ENV HF_HOME=voices +ENV COQUI_TOS_AGREED=1 + +CMD bash startup.sh + diff --git a/src/tts/download_voices_tts-1.sh b/src/tts/download_voices_tts-1.sh new file mode 100644 index 0000000..f880650 --- /dev/null +++ b/src/tts/download_voices_tts-1.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# cat voice_to_speaker.default.yaml | yq '.tts-1 ' | grep mode | cut -d'/' -f2 | cut -d'.' -f1 | sort -u | xargs +models=${*:-"en_GB-alba-medium en_GB-northern_english_male-medium en_US-bryce-medium en_US-john-medium en_US-libritts_r-medium en_US-ryan-high fr_FR-siwis-medium fr_FR-tom-medium fr_FR-upmc-medium"} +piper --update-voices --data-dir voices --download-dir voices --model x 2> /dev/null +for i in $models ; do + [ ! -e "voices/$i.onnx" ] && piper --data-dir voices --download-dir voices --model $i < /dev/null > /dev/null +done + diff --git a/src/tts/voice_to_speaker.default.yaml b/src/tts/voice_to_speaker.default.yaml new file mode 100644 index 0000000..53acda6 --- /dev/null +++ b/src/tts/voice_to_speaker.default.yaml @@ -0,0 +1,36 @@ +# Use https://rhasspy.github.io/piper-samples/ to configure +tts-1: + alloy: + model: voices/en_US-libritts_r-medium.onnx + speaker: 79 + siwis: + model: voices/fr_FR-siwis-medium.onnx + speaker: 0 + tom: + model: voices/fr_FR-tom-medium.onnx + speaker: 0 + pierre: + model: voices/fr_FR-upmc-medium.onnx + speaker: 1 + jessica: + model: voices/fr_FR-upmc-medium.onnx + speaker: 0 + alba: + model: voices/en_GB-alba-medium.onnx + speaker: 0 + jack: + model: voices/en_GB-northern_english_male-medium.onnx + speaker: 0 + john: + model: voices/en_US-john-medium.onnx + speaker: 0 + bryce: + model: voices/en_US-bryce-medium.onnx + speaker: 0 + ryan: + model: voices/en_US-ryan-high.onnx + speaker: 0 + echo: + model: voices/en_US-libritts_r-medium.onnx + speaker: 134 + |