Merge pull request #164 from breakstring/main

Docker support
2025-12-04 00:02:17 +08:00 · 2024-01-23 17:50:21 +08:00 · 2024-01-23 17:50:21 +08:00 · 2552a55873
commit 2552a55873
parent 252cb3799f 012440bc0e
10 changed files with 221 additions and 2 deletions
--- a/Docker/damo.sha256
+++ b/Docker/damo.sha256
@ -0,0 +1,3 @@
 5bba782a5e9196166233b9ab12ba04cadff9ef9212b4ff6153ed9290ff679025 /workspace/tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb
 b3be75be477f0780277f3bae0fe489f48718f585f3a6e45d7dd1fbb1a4255fc5 /workspace/tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch/model.pb
 a5818bb9d933805a916eebe41eb41648f7f9caad30b4bd59d56f3ca135421916 /workspace/tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/model.pb
--- a/Docker/download.sh
+++ b/Docker/download.sh
@ -0,0 +1,11 @@
 #!/usr/bin/env bash
 set -Eeuo pipefail
 echo "Downloading models..."
 aria2c --disable-ipv6 --input-file /workspace/Docker/links.txt --dir /workspace --continue
 echo "Checking SHA256..."
 parallel --will-cite -a /workspace/Docker/links.sha256 "echo -n {} | sha256sum -c"
--- a/Docker/links.sha256
+++ b/Docker/links.sha256
@ -0,0 +1,12 @@
 b1c1e17e9c99547a89388f72048cd6e1b41b5a18b170e86a46dfde0324d63eb1 /workspace/GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
 fc579c1db3c1e21b721001cf99d7a584214280df19b002e200b630a34fa06eb8 /workspace/GPT_SoVITS/pretrained_models/s2D488k.pth
 020a014e1e01e550e510f2f61fae5e5f5b6aab40f15c22f1f12f724df507e835 /workspace/GPT_SoVITS/pretrained_models/s2G488k.pth
 24164f129c66499d1346e2aa55f183250c223161ec2770c0da3d3b08cf432d3c /workspace/GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin
 e53a693acc59ace251d143d068096ae0d7b79e4b1b503fa84c9dcf576448c1d8 /workspace/GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin
 39796caa5db18d7f9382d8ac997ac967bfd85f7761014bb807d2543cc844ef05 /workspace/tools/uvr5/uvr5_weights/HP2_all_vocals.pth
 45e6b65199e781b4a6542002699be9f19cd3d1cb7d1558bc2bfbcd84674dfe28 /workspace/tools/uvr5/uvr5_weights/HP3_all_vocals.pth
 5908891829634926119720241e8573d97cbeb8277110a7512bdb0bd7563258ee /workspace/tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth
 8c8fd1582f9aabc363e47af62ddb88df6cae7e064cae75bbf041a067a5e0aee2 /workspace/tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth
 01376dd2a571bf3cb9cced680732726d2d732609d09216a610b0d110f133febe /workspace/tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth
 56aba59db3bcdd14a14464e62f3129698ecdea62eee0f003b9360923eb3ac79e /workspace/tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth
 233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80 /workspace/tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
--- a/Docker/links.txt
+++ b/Docker/links.txt
@ -0,0 +1,34 @@
 # GPT-SoVITS models
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s1bert25hz-2kh-longer-epoch%3D68e-step%3D50232.ckpt
  out=GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2D488k.pth
  out=GPT_SoVITS/pretrained_models/s2D488k.pth
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2G488k.pth
  out=GPT_SoVITS/pretrained_models/s2G488k.pth
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/config.json
  out=GPT_SoVITS/pretrained_models/chinese-hubert-base/config.json
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/preprocessor_config.json
  out=GPT_SoVITS/pretrained_models/chinese-hubert-base/preprocessor_config.json
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/pytorch_model.bin
  out=GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/config.json
  out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/config.json
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/pytorch_model.bin
  out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin
 https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/tokenizer.json
  out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/tokenizer.json
 # UVR5
 https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth
  out=tools/uvr5/uvr5_weights/HP2_all_vocals.pth
 https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth
  out=tools/uvr5/uvr5_weights/HP3_all_vocals.pth
 https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth
  out=tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth
 https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth
  out=tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth
 https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth
  out=tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth
 https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth
  out=tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth
 https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
  out=tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
--- a/53
+++ b/53
@ -0,0 +1,53 @@
 # Base CUDA image
 FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04
 LABEL maintainer="breakstring@hotmail.com"
 LABEL version="dev-20240123.03"
 LABEL description="Docker image for GPT-SoVITS"
 # Install 3rd party apps
 ENV DEBIAN_FRONTEND=noninteractive
 ENV TZ=Etc/UTC
 RUN apt-get update && \
    apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && \
    rm -rf /var/lib/apt/lists/* && \
    git lfs install
 # Copy application
 WORKDIR /workspace
 COPY . /workspace
 # Download models
 RUN chmod +x /workspace/Docker/download.sh && /workspace/Docker/download.sh
 # 本应该从 requirements.txt 里面安装package，但是由于funasr和modelscope的问题，暂时先在后面手工安装依赖包吧
 RUN pip install --no-cache-dir torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba psutil PyYAML
 # 这里强制指定了modelscope和funasr的版本，后面damo_asr的模型让它们自己下载
 RUN pip install --no-cache-dir modelscope~=1.10.0 torchaudio sentencepiece funasr~=0.8.7
 # 先屏蔽掉，让容器里自己下载
 # Clone damo_asr
 #WORKDIR /workspace/tools/damo_asr/models
 #RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \
 #    (cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull)
 #RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \
 #    (cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull)
 #RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \
 #    (cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull)
 #RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c"
 #WORKDIR /workspace
 EXPOSE 9870
 EXPOSE 9871
 EXPOSE 9872
 EXPOSE 9873
 EXPOSE 9874
 VOLUME /workspace/output
 VOLUME /workspace/logs
 VOLUME /workspace/SoVITS_weights
 CMD ["python", "webui.py"]
--- a/README.md
+++ b/README.md
@ -107,6 +107,31 @@ For Chinese ASR (additionally), download models from [Damo ASR Model](https://mo
 For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
 ### Using Docker
 #### docker-compose.yaml configuration
 1. Environment Variables：
  - is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation.
 2. Volumes Configuration，The application's root directory inside the container is set to /workspace. The default docker-compose.yaml lists some practical examples for uploading/downloading content.
 3. shm_size： The default available memory for Docker Desktop on Windows is too small, which can cause abnormal operations. Adjust according to your own situation.
 4. Under the deploy section, GPU-related settings should be adjusted cautiously according to your system and actual circumstances.
 #### Running with docker compose
 ```
 docker compose -f "docker-compose.yaml" up -d
 ```
 #### Running with docker command
 As above, modify the corresponding parameters based on your actual situation, then run the following command:
 ```
 docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
 ```
 ## Dataset Format
 The TTS annotation .list file format:
--- a/config.py
+++ b/config.py
@ -1,10 +1,10 @@
-import sys
+import sys,os
 # 推理用的指定模型
 sovits_path = ""
 gpt_path = ""
-is_half = True
+is_half = eval(os.environ.get("is_half",True))
 is_share=False
 cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,31 @@
 version: '3.8'
 services:
  gpt-sovits:
    image: breakstring/gpt-sovits:dev-20240123.03
    container_name: gpt-sovits-container
    environment:
      - is_half=False
    volumes:
      - G:/GPT-SoVITS-DockerTest/output:/workspace/output
      - G:/GPT-SoVITS-DockerTest/logs:/workspace/logs
      - G:/GPT-SoVITS-DockerTest/SoVITS_weights:/workspace/SoVITS_weights
      - G:/GPT-SoVITS-DockerTest/reference:/workspace/reference
    working_dir: /workspace
    ports:
      - "9870:9870"
      - "9871:9871"
      - "9872:9872"
      - "9873:9873"
      - "9874:9874"
    shm_size: 16G
    deploy:
      resources:
        reservations:
          devices:
          - driver: nvidia
            count: "all"
            capabilities: [gpu]
    stdin_open: true
    tty: true
    restart: unless-stopped
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@ -87,6 +87,32 @@ brew install ffmpeg
 下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
 ### 在 Docker 中使用
 #### docker-compose.yaml 设置
 1. 环境变量：
  - is_half: 半精度/双精度控制。在进行 "SSL extracting" 步骤时如果无法正确生成 4-cnhubert/5-wav32k 目录下的内容时，一般都是它引起的，可以根据实际情况来调整为True或者False。
 2. Volume设置，容器内的应用根目录设置为 /workspace。 默认的 docker-compose.yaml 中列出了一些实际的例子，便于上传/下载内容。
 3. shm_size：Windows下的Docker Desktop默认可用内存过小，会导致运行异常，根据自己情况酌情设置。
 4. deploy小节下的gpu相关内容，请根据您的系统和实际情况酌情设置。
 #### 通过 docker compose运行
 ```
 docker compose -f "docker-compose.yaml" up -d
 ```
 #### 通过 docker 命令运行
 同上，根据您自己的实际情况修改对应的参数，然后运行如下命令：
 ```
 docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
 ```
 ### 预训练模型
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@ -93,6 +93,30 @@ brew install ffmpeg
 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。
 ### Dockerの使用
 #### docker-compose.yamlの設定
 1. 環境変数：
    - `is_half`：半精度／倍精度の制御。"SSL抽出"ステップ中に`4-cnhubert/5-wav32k`ディレクトリ内の内容が正しく生成されない場合、通常これが原因です。実際の状況に応じてTrueまたはFalseに調整してください。
 2. ボリューム設定：コンテナ内のアプリケーションのルートディレクトリは`/workspace`に設定されます。デフォルトの`docker-compose.yaml`には、アップロード／ダウンロードの内容の実例がいくつか記載されています。
 3. `shm_size`：WindowsのDocker Desktopのデフォルトの利用可能メモリが小さすぎるため、異常な動作を引き起こす可能性があります。状況に応じて適宜設定してください。
 4. `deploy`セクションのGPUに関連する内容は、システムと実際の状況に応じて慎重に設定してください。
 #### docker composeで実行する
 ```markdown
 docker compose -f "docker-compose.yaml" up -d
 ```
 #### dockerコマンドで実行する
 上記と同様に、実際の状況に基づいて対応するパラメータを変更し、次のコマンドを実行します：
 ```markdown
 docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
 ```
 ### 事前訓練済みモデル