diff --git a/.vscode/settings.json b/.vscode/settings.json index 89a552b..bf99ca8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,10 +5,12 @@ "cudnn", "cufft", "curand", + "espeak", "manylinux", "nvrtc", "onnxruntime", "phonemize", + "rhasspy", "setuptools", "slackr", "venv", diff --git a/Dockerfile b/Dockerfile index 4a18c33..9de4ede 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,8 +4,9 @@ WORKDIR /usr/src ARG TARGETARCH=amd64 ARG TARGETVARIANT= -ARG WYOMING_PIPER_VERSION='1.4.0' -ARG PIPER_RELEASE='1.2.0' +ARG WYOMING_PIPER_VERSION="1.4.0" +ARG PIPER_RELEASE="1.2.0" +ARG PIPER_URL="https://github.com/rhasspy/piper/releases/download/v${PIPER_RELEASE}/piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" ENV DEBIAN_FRONTEND=noninteractive @@ -21,57 +22,46 @@ RUN \ python3-pip RUN \ - mkdir -p /data /app/tests &&\ + mkdir -p /data /app &&\ python3 -m venv /app &&\ . /app/bin/activate &&\ /app/bin/python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel &&\ - /app/bin/python3 -m pip install --no-cache-dir \ - torch \ - py-cpuinfo \ - psutil - # tensorflow[and-cuda] \ + /app/bin/python3 -m pip install --no-cache-dir torch RUN \ . /app/bin/activate && \ - /app/bin/python3 -m pip install --no-cache-dir --force-reinstall --no-deps\ - "piper-tts==${PIPER_RELEASE}" \ - &&\ - \ - /app/bin/python3 -m pip install --no-cache-dir \ - piper_phonemize==1.1.0 \ + /app/bin/python3 -m pip install --no-cache-dir --no-deps\ + "piper-tts==${PIPER_RELEASE}"\ &&\ \ /app/bin/python3 -m pip install --no-cache-dir\ - onnxruntime-gpu \ + piper_phonemize==1.1.0\ + &&\ + \ + /app/bin/python3 -m pip install --no-cache-dir\ + onnxruntime-gpu\ &&\ \ /app/bin/python3 -m pip install --no-cache-dir\ "wyoming-piper==${WYOMING_PIPER_VERSION}"\ &&\ \ - wget \ - "https://github.com/rhasspy/piper/releases/download/v${PIPER_RELEASE}/piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" -O -|tar -zxvf - -C /usr/share + wget "${PIPER_URL}" -O -|tar -zxvf - -C /app - # pip3 install --no-cache-dir --force-reinstall --no-deps\ - # piper_phonemize-1.1.0-py3-none-any.whl &&\ - # \ - # "wyoming-piper @ https://github.com/rhasspy/wyoming-piper/archive/refs/tags/v${WYOMING_PIPER_VERSION}.tar.gz" - # wget https://github.com/rhasspy/piper-phonemize/releases/download/v1.1.0/piper_phonemize-1.1.0-cp310-cp310-manylinux_2_28_x86_64.whl &&\ - # mv piper_phonemize-1.1.0-cp310-cp310-manylinux_2_28_x86_64.whl piper_phonemize-1.1.0-py3-none-any.whl &&\ - # rm -r piper_phonemize-1.1.0-py3-none-any.whl &&\ # Patch to enable CUDA arguments for piper -COPY patch/wyoming-piper_cuda.patch /tmp/ +COPY patches/* /tmp/ RUN \ cd /app/lib/python3.10/site-packages/wyoming_piper/;\ - patch -p0 --forward < /tmp/wyoming-piper_cuda.patch || true + for file in /tmp/wyoming_piper*.diff;do patch -p0 --forward < $file;done;\ + cd /app/lib/python3.10/site-packages/piper/;\ + for file in /tmp/piper*.diff;do patch -p0 --forward < $file;done;\ + true # Clean up RUN \ rm -rf /root/.cache/pip /var/lib/apt/lists/* /tmp/* -COPY tests/* /app/tests/ - WORKDIR /app COPY run.sh /app/ RUN chmod +x /app/run.sh diff --git a/README.md b/README.md index 8bfaaad..e80bd27 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,9 @@ https://github.com/rhasspy/wyoming-piper [![Publish Docker image](https://github.com/slackr31337/wyoming-piper-gpu/actions/workflows/docker-image.yml/badge.svg)](https://github.com/slackr31337/wyoming-piper-gpu/actions/workflows/docker-image.yml) +# Working +docker pull ghcr.io/slackr31337/wyoming-piper-gpu:v2023.12.0 - +# Latest docker pull ghcr.io/slackr31337/wyoming-piper-gpu:latest diff --git a/patch/wyoming-piper_cuda.patch b/patch/wyoming-piper_cuda.patch deleted file mode 100644 index 438615c..0000000 --- a/patch/wyoming-piper_cuda.patch +++ /dev/null @@ -1,35 +0,0 @@ ---- __main__.py 2025-01-04 17:48:27.645862671 +0000 -+++ __main__.py.patched 2025-01-04 17:51:50.193357609 +0000 -@@ -66,6 +66,15 @@ - help="Download latest voices.json during startup", - ) - # -+ parser.add_argument( -+ "--cuda", -+ action="store_true", -+ help="Use GPU" -+ ) -+ parser.add_argument( -+ "--espeak-data-dir", -+ help="Path to espeak-ng data directory" -+ ) - parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") - args = parser.parse_args() - - ---- process.py 2025-01-04 17:48:27.653862810 +0000 -+++ process.py.patched 2025-01-04 17:52:48.494362993 +0000 -@@ -148,6 +148,14 @@ - if self.args.noise_w: - piper_args.extend(["--noise-w", str(self.args.noise_w)]) - -+ if self.args.espeak_data_dir: -+ piper_args.extend(["--espeak_data", str(self.args.espeak_data_dir)]) -+ -+ if self.args.cuda: -+ piper_args.extend(["--debug"]) -+ piper_args.extend(["--cuda"]) -+ - _LOGGER.debug( - "Starting piper process: %s args=%s", self.args.piper, piper_args - ) diff --git a/patches/piper_voice.diff b/patches/piper_voice.diff new file mode 100644 index 0000000..10affe8 --- /dev/null +++ b/patches/piper_voice.diff @@ -0,0 +1,11 @@ +--- voice.py 2025-01-04 19:49:49.000000000 +0000 ++++ voice.patched 2025-01-04 21:54:21.970873501 +0000 +@@ -41,7 +41,7 @@ + sess_options=onnxruntime.SessionOptions(), + providers=["CPUExecutionProvider"] + if not use_cuda +- else ["CUDAExecutionProvider"], ++ else [("CUDAExecutionProvider", {"cudnn_conv_algo_search": "HEURISTIC"})], + ), + ) + diff --git a/patches/wyoming_piper__main__.diff b/patches/wyoming_piper__main__.diff new file mode 100644 index 0000000..18bde80 --- /dev/null +++ b/patches/wyoming_piper__main__.diff @@ -0,0 +1,19 @@ +--- __main__.py 2025-01-04 21:57:16.229874190 +0000 ++++ __main__.patched 2025-01-04 21:56:57.917558487 +0000 +@@ -66,6 +66,16 @@ + help="Download latest voices.json during startup", + ) + # ++ parser.add_argument( ++ "--cuda", ++ action="store_true", ++ help="Use GPU" ++ ) ++ parser.add_argument( ++ "--espeak-data-dir", ++ help="Path to espeak-ng data directory" ++ ) ++ # + parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") + args = parser.parse_args() + diff --git a/patches/wyoming_piper_handler.diff b/patches/wyoming_piper_handler.diff new file mode 100644 index 0000000..0b72558 --- /dev/null +++ b/patches/wyoming_piper_handler.diff @@ -0,0 +1,10 @@ +--- handler.py 2025-01-04 19:50:00.000000000 +0000 ++++ handler.py.patched 2025-01-04 20:58:52.201781636 +0000 +@@ -88,7 +88,7 @@ + + _LOGGER.debug("input: %s", input_obj) + piper_proc.proc.stdin.write( +- (json.dumps(input_obj, ensure_ascii=False) + "\n").encode() ++ (f"{text}\n").encode() + ) + await piper_proc.proc.stdin.drain() diff --git a/patches/wyoming_piper_process.diff b/patches/wyoming_piper_process.diff new file mode 100644 index 0000000..a0f7bda --- /dev/null +++ b/patches/wyoming_piper_process.diff @@ -0,0 +1,23 @@ +--- process.py 2025-01-04 19:50:00.000000000 +0000 ++++ process.py.patched 2025-01-04 20:58:10.813174885 +0000 +@@ -130,7 +130,6 @@ + str(config_path), + "--output_dir", + str(wav_dir.name), +- "--json-input", # piper 1.1+ + ] + + if voice_speaker is not None: +@@ -148,6 +147,12 @@ + if self.args.noise_w: + piper_args.extend(["--noise-w", str(self.args.noise_w)]) + ++ if self.args.espeak_data_dir: ++ piper_args.extend(["--espeak_data", str(self.args.espeak_data_dir)]) ++ ++ if self.args.cuda: ++ piper_args.extend(["--cuda"]) ++ + _LOGGER.debug( + "Starting piper process: %s args=%s", self.args.piper, piper_args + ) diff --git a/run.sh b/run.sh index 2044515..2808d7b 100644 --- a/run.sh +++ b/run.sh @@ -4,9 +4,10 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/app/lib/python3.10/site-packages/nvidia # Run wyoming-piper server source /app/bin/activate + /app/bin/python3 -m wyoming_piper \ --piper '/app/bin/piper' \ - --cuda \ --uri 'tcp://0.0.0.0:10200' \ --data-dir /data \ - --download-dir /data --debug "$@" + --download-dir /data \ + --cuda "$@"