mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Docker化初步版本完成
This commit is contained in:
parent
866cde6f8b
commit
d96b7d65ec
43
Dockerfile
43
Dockerfile
@ -1,6 +1,11 @@
|
|||||||
# Base CUDA image
|
# Base CUDA image
|
||||||
FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04
|
FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04
|
||||||
|
|
||||||
|
LABEL maintainer="breakstring@hotmail.com"
|
||||||
|
LABEL version="dev-20240123.03"
|
||||||
|
LABEL description="Docker image for GPT-SoVITS"
|
||||||
|
|
||||||
|
|
||||||
# Install 3rd party apps
|
# Install 3rd party apps
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV TZ=Etc/UTC
|
ENV TZ=Etc/UTC
|
||||||
@ -9,33 +14,31 @@ RUN apt-get update && \
|
|||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
git lfs install
|
git lfs install
|
||||||
|
|
||||||
|
|
||||||
# Install python packages
|
|
||||||
WORKDIR /temp
|
|
||||||
COPY ./requirements.txt /temp/requirements.txt
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
|
|
||||||
# Copy application
|
# Copy application
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
COPY . /workspace
|
COPY . /workspace
|
||||||
|
|
||||||
|
|
||||||
# Download models
|
# Download models
|
||||||
RUN chmod +x /workspace/Docker/download.sh && /workspace/Docker/download.sh
|
RUN chmod +x /workspace/Docker/download.sh && /workspace/Docker/download.sh
|
||||||
|
|
||||||
# Clone 3rd repos
|
# 本应该从 requirements.txt 里面安装package,但是由于funasr和modelscope的问题,暂时先在后面手工安装依赖包吧
|
||||||
WORKDIR /workspace/tools/damo_asr/models
|
RUN pip install --no-cache-dir torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba psutil PyYAML
|
||||||
RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \
|
# 这里强制指定了modelscope和funasr的版本,后面damo_asr的模型让它们自己下载
|
||||||
(cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull)
|
RUN pip install --no-cache-dir modelscope~=1.10.0 torchaudio sentencepiece funasr~=0.8.7
|
||||||
RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \
|
|
||||||
(cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull)
|
|
||||||
RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \
|
|
||||||
(cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull)
|
|
||||||
|
|
||||||
RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c"
|
# 先屏蔽掉,让容器里自己下载
|
||||||
|
# Clone damo_asr
|
||||||
|
#WORKDIR /workspace/tools/damo_asr/models
|
||||||
|
#RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \
|
||||||
|
# (cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull)
|
||||||
|
#RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \
|
||||||
|
# (cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull)
|
||||||
|
#RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \
|
||||||
|
# (cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull)
|
||||||
|
|
||||||
WORKDIR /workspace
|
#RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c"
|
||||||
|
|
||||||
|
#WORKDIR /workspace
|
||||||
|
|
||||||
EXPOSE 9870
|
EXPOSE 9870
|
||||||
EXPOSE 9871
|
EXPOSE 9871
|
||||||
@ -43,4 +46,8 @@ EXPOSE 9872
|
|||||||
EXPOSE 9873
|
EXPOSE 9873
|
||||||
EXPOSE 9874
|
EXPOSE 9874
|
||||||
|
|
||||||
|
VOLUME /workspace/output
|
||||||
|
VOLUME /workspace/logs
|
||||||
|
VOLUME /workspace/SoVITS_weights
|
||||||
|
|
||||||
CMD ["python", "webui.py"]
|
CMD ["python", "webui.py"]
|
25
README.md
25
README.md
@ -107,6 +107,31 @@ For Chinese ASR (additionally), download models from [Damo ASR Model](https://mo
|
|||||||
For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
|
For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
|
||||||
|
|
||||||
|
|
||||||
|
### Using Docker
|
||||||
|
|
||||||
|
#### docker-compose.yaml configuration
|
||||||
|
|
||||||
|
1. Environment Variables:
|
||||||
|
- is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation.
|
||||||
|
|
||||||
|
2. Volumes Configuration,The application's root directory inside the container is set to /workspace. The default docker-compose.yaml lists some practical examples for uploading/downloading content.
|
||||||
|
3. shm_size: The default available memory for Docker Desktop on Windows is too small, which can cause abnormal operations. Adjust according to your own situation.
|
||||||
|
4. Under the deploy section, GPU-related settings should be adjusted cautiously according to your system and actual circumstances.
|
||||||
|
|
||||||
|
|
||||||
|
#### Running with docker compose
|
||||||
|
```
|
||||||
|
docker compose -f "docker-compose.yaml" up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Running with docker command
|
||||||
|
|
||||||
|
As above, modify the corresponding parameters based on your actual situation, then run the following command:
|
||||||
|
```
|
||||||
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Dataset Format
|
## Dataset Format
|
||||||
|
|
||||||
The TTS annotation .list file format:
|
The TTS annotation .list file format:
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import sys
|
import sys,os
|
||||||
|
|
||||||
|
|
||||||
# 推理用的指定模型
|
# 推理用的指定模型
|
||||||
sovits_path = ""
|
sovits_path = ""
|
||||||
gpt_path = ""
|
gpt_path = ""
|
||||||
is_half = True
|
is_half = eval(os.environ.get("is_half",True))
|
||||||
is_share=False
|
is_share=False
|
||||||
|
|
||||||
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
||||||
|
31
docker-compose.yaml
Normal file
31
docker-compose.yaml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
gpt-sovits:
|
||||||
|
image: breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
container_name: gpt-sovits-container
|
||||||
|
environment:
|
||||||
|
- is_half=False
|
||||||
|
volumes:
|
||||||
|
- G:/GPT-SoVITS-DockerTest/output:/workspace/output
|
||||||
|
- G:/GPT-SoVITS-DockerTest/logs:/workspace/logs
|
||||||
|
- G:/GPT-SoVITS-DockerTest/SoVITS_weights:/workspace/SoVITS_weights
|
||||||
|
- G:/GPT-SoVITS-DockerTest/reference:/workspace/reference
|
||||||
|
working_dir: /workspace
|
||||||
|
ports:
|
||||||
|
- "9870:9870"
|
||||||
|
- "9871:9871"
|
||||||
|
- "9872:9872"
|
||||||
|
- "9873:9873"
|
||||||
|
- "9874:9874"
|
||||||
|
shm_size: 16G
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: "all"
|
||||||
|
capabilities: [gpu]
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
|
restart: unless-stopped
|
@ -87,6 +87,32 @@ brew install ffmpeg
|
|||||||
|
|
||||||
下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
|
下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
|
||||||
|
|
||||||
|
### 在 Docker 中使用
|
||||||
|
|
||||||
|
#### docker-compose.yaml 设置
|
||||||
|
|
||||||
|
1. 环境变量:
|
||||||
|
- is_half: 半精度/双精度控制。在进行 "SSL extracting" 步骤时如果无法正确生成 4-cnhubert/5-wav32k 目录下的内容时,一般都是它引起的,可以根据实际情况来调整为True或者False。
|
||||||
|
|
||||||
|
2. Volume设置,容器内的应用根目录设置为 /workspace。 默认的 docker-compose.yaml 中列出了一些实际的例子,便于上传/下载内容。
|
||||||
|
3. shm_size:Windows下的Docker Desktop默认可用内存过小,会导致运行异常,根据自己情况酌情设置。
|
||||||
|
4. deploy小节下的gpu相关内容,请根据您的系统和实际情况酌情设置。
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### 通过 docker compose运行
|
||||||
|
```
|
||||||
|
docker compose -f "docker-compose.yaml" up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 通过 docker 命令运行
|
||||||
|
|
||||||
|
同上,根据您自己的实际情况修改对应的参数,然后运行如下命令:
|
||||||
|
```
|
||||||
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### 预训练模型
|
### 预训练模型
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,6 +93,30 @@ brew install ffmpeg
|
|||||||
|
|
||||||
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。
|
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。
|
||||||
|
|
||||||
|
### Dockerの使用
|
||||||
|
|
||||||
|
#### docker-compose.yamlの設定
|
||||||
|
|
||||||
|
1. 環境変数:
|
||||||
|
- `is_half`:半精度/倍精度の制御。"SSL抽出"ステップ中に`4-cnhubert/5-wav32k`ディレクトリ内の内容が正しく生成されない場合、通常これが原因です。実際の状況に応じてTrueまたはFalseに調整してください。
|
||||||
|
|
||||||
|
2. ボリューム設定:コンテナ内のアプリケーションのルートディレクトリは`/workspace`に設定されます。デフォルトの`docker-compose.yaml`には、アップロード/ダウンロードの内容の実例がいくつか記載されています。
|
||||||
|
3. `shm_size`:WindowsのDocker Desktopのデフォルトの利用可能メモリが小さすぎるため、異常な動作を引き起こす可能性があります。状況に応じて適宜設定してください。
|
||||||
|
4. `deploy`セクションのGPUに関連する内容は、システムと実際の状況に応じて慎重に設定してください。
|
||||||
|
|
||||||
|
#### docker composeで実行する
|
||||||
|
```markdown
|
||||||
|
docker compose -f "docker-compose.yaml" up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
#### dockerコマンドで実行する
|
||||||
|
|
||||||
|
上記と同様に、実際の状況に基づいて対応するパラメータを変更し、次のコマンドを実行します:
|
||||||
|
```markdown
|
||||||
|
docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### 事前訓練済みモデル
|
### 事前訓練済みモデル
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user