From ccbc93f4fc9f0d25c843ae93fda5cfb0f8ff151e Mon Sep 17 00:00:00 2001 From: Kenn Zhang Date: Mon, 22 Jan 2024 20:21:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E6=AD=A5=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Docker/damo.sha256 | 3 +++ Docker/download.sh | 11 +++++++++++ Docker/links.sha256 | 12 ++++++++++++ Docker/links.txt | 34 +++++++++++++++++++++++++++++++++ Dockerfile | 46 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+) create mode 100644 Docker/damo.sha256 create mode 100644 Docker/download.sh create mode 100644 Docker/links.sha256 create mode 100644 Docker/links.txt create mode 100644 Dockerfile diff --git a/Docker/damo.sha256 b/Docker/damo.sha256 new file mode 100644 index 0000000..6e9804d --- /dev/null +++ b/Docker/damo.sha256 @@ -0,0 +1,3 @@ +5bba782a5e9196166233b9ab12ba04cadff9ef9212b4ff6153ed9290ff679025 /workspace/tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb +b3be75be477f0780277f3bae0fe489f48718f585f3a6e45d7dd1fbb1a4255fc5 /workspace/tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch/model.pb +a5818bb9d933805a916eebe41eb41648f7f9caad30b4bd59d56f3ca135421916 /workspace/tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/model.pb \ No newline at end of file diff --git a/Docker/download.sh b/Docker/download.sh new file mode 100644 index 0000000..447e018 --- /dev/null +++ b/Docker/download.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +echo "Downloading models..." + +aria2c --disable-ipv6 --input-file /workspace/Docker/links.txt --dir /workspace --continue + +echo "Checking SHA256..." + +parallel --will-cite -a /workspace/Docker/links.sha256 "echo -n {} | sha256sum -c" diff --git a/Docker/links.sha256 b/Docker/links.sha256 new file mode 100644 index 0000000..cda6dc1 --- /dev/null +++ b/Docker/links.sha256 @@ -0,0 +1,12 @@ +b1c1e17e9c99547a89388f72048cd6e1b41b5a18b170e86a46dfde0324d63eb1 /workspace/GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt +fc579c1db3c1e21b721001cf99d7a584214280df19b002e200b630a34fa06eb8 /workspace/GPT_SoVITS/pretrained_models/s2D488k.pth +020a014e1e01e550e510f2f61fae5e5f5b6aab40f15c22f1f12f724df507e835 /workspace/GPT_SoVITS/pretrained_models/s2G488k.pth +24164f129c66499d1346e2aa55f183250c223161ec2770c0da3d3b08cf432d3c /workspace/GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin +e53a693acc59ace251d143d068096ae0d7b79e4b1b503fa84c9dcf576448c1d8 /workspace/GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin +39796caa5db18d7f9382d8ac997ac967bfd85f7761014bb807d2543cc844ef05 /workspace/tools/uvr5/uvr5_weights/HP2_all_vocals.pth +45e6b65199e781b4a6542002699be9f19cd3d1cb7d1558bc2bfbcd84674dfe28 /workspace/tools/uvr5/uvr5_weights/HP3_all_vocals.pth +5908891829634926119720241e8573d97cbeb8277110a7512bdb0bd7563258ee /workspace/tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth +8c8fd1582f9aabc363e47af62ddb88df6cae7e064cae75bbf041a067a5e0aee2 /workspace/tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth +01376dd2a571bf3cb9cced680732726d2d732609d09216a610b0d110f133febe /workspace/tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth +56aba59db3bcdd14a14464e62f3129698ecdea62eee0f003b9360923eb3ac79e /workspace/tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth +233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80 /workspace/tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx \ No newline at end of file diff --git a/Docker/links.txt b/Docker/links.txt new file mode 100644 index 0000000..e6603db --- /dev/null +++ b/Docker/links.txt @@ -0,0 +1,34 @@ +# GPT-SoVITS models +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s1bert25hz-2kh-longer-epoch%3D68e-step%3D50232.ckpt + out=GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2D488k.pth + out=GPT_SoVITS/pretrained_models/s2D488k.pth +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2G488k.pth + out=GPT_SoVITS/pretrained_models/s2G488k.pth +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/config.json + out=GPT_SoVITS/pretrained_models/chinese-hubert-base/config.json +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/preprocessor_config.json + out=GPT_SoVITS/pretrained_models/chinese-hubert-base/preprocessor_config.json +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/pytorch_model.bin + out=GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/config.json + out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/config.json +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/pytorch_model.bin + out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/tokenizer.json + out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/tokenizer.json +# UVR5 +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth + out=tools/uvr5/uvr5_weights/HP2_all_vocals.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth + out=tools/uvr5/uvr5_weights/HP3_all_vocals.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth + out=tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth + out=tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth + out=tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth + out=tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx + out=tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d39bf21 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +# Base CUDA image +FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04 + +# Install 3rd party apps +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=Etc/UTC +RUN apt-get update && \ + apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && \ + rm -rf /var/lib/apt/lists/* && \ + git lfs install + + +# Install python packages +WORKDIR /temp +COPY ./requirements.txt /temp/requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + + +# Copy application +WORKDIR /workspace +COPY . /workspace + + +# Download models +RUN chmod +x /workspace/Docker/download.sh && /workspace/Docker/download.sh + +# Clone 3rd repos +WORKDIR /workspace/tools/damo_asr/models +RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \ + (cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull) +RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \ + (cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull) +RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \ + (cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull) + +RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c" + +WORKDIR /workspace + +EXPOSE 9870 +EXPOSE 9871 +EXPOSE 9872 +EXPOSE 9873 +EXPOSE 9874 + +CMD ["python", "webui.py"] \ No newline at end of file