From ccbc93f4fc9f0d25c843ae93fda5cfb0f8ff151e Mon Sep 17 00:00:00 2001 From: Kenn Zhang Date: Mon, 22 Jan 2024 20:21:18 +0800 Subject: [PATCH 01/16] =?UTF-8?q?=E5=88=9D=E6=AD=A5=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Docker/damo.sha256 | 3 +++ Docker/download.sh | 11 +++++++++++ Docker/links.sha256 | 12 ++++++++++++ Docker/links.txt | 34 +++++++++++++++++++++++++++++++++ Dockerfile | 46 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+) create mode 100644 Docker/damo.sha256 create mode 100644 Docker/download.sh create mode 100644 Docker/links.sha256 create mode 100644 Docker/links.txt create mode 100644 Dockerfile diff --git a/Docker/damo.sha256 b/Docker/damo.sha256 new file mode 100644 index 00000000..6e9804da --- /dev/null +++ b/Docker/damo.sha256 @@ -0,0 +1,3 @@ +5bba782a5e9196166233b9ab12ba04cadff9ef9212b4ff6153ed9290ff679025 /workspace/tools/damo_asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.pb +b3be75be477f0780277f3bae0fe489f48718f585f3a6e45d7dd1fbb1a4255fc5 /workspace/tools/damo_asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch/model.pb +a5818bb9d933805a916eebe41eb41648f7f9caad30b4bd59d56f3ca135421916 /workspace/tools/damo_asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/model.pb \ No newline at end of file diff --git a/Docker/download.sh b/Docker/download.sh new file mode 100644 index 00000000..447e018e --- /dev/null +++ b/Docker/download.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +echo "Downloading models..." + +aria2c --disable-ipv6 --input-file /workspace/Docker/links.txt --dir /workspace --continue + +echo "Checking SHA256..." + +parallel --will-cite -a /workspace/Docker/links.sha256 "echo -n {} | sha256sum -c" diff --git a/Docker/links.sha256 b/Docker/links.sha256 new file mode 100644 index 00000000..cda6dc15 --- /dev/null +++ b/Docker/links.sha256 @@ -0,0 +1,12 @@ +b1c1e17e9c99547a89388f72048cd6e1b41b5a18b170e86a46dfde0324d63eb1 /workspace/GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt +fc579c1db3c1e21b721001cf99d7a584214280df19b002e200b630a34fa06eb8 /workspace/GPT_SoVITS/pretrained_models/s2D488k.pth +020a014e1e01e550e510f2f61fae5e5f5b6aab40f15c22f1f12f724df507e835 /workspace/GPT_SoVITS/pretrained_models/s2G488k.pth +24164f129c66499d1346e2aa55f183250c223161ec2770c0da3d3b08cf432d3c /workspace/GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin +e53a693acc59ace251d143d068096ae0d7b79e4b1b503fa84c9dcf576448c1d8 /workspace/GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin +39796caa5db18d7f9382d8ac997ac967bfd85f7761014bb807d2543cc844ef05 /workspace/tools/uvr5/uvr5_weights/HP2_all_vocals.pth +45e6b65199e781b4a6542002699be9f19cd3d1cb7d1558bc2bfbcd84674dfe28 /workspace/tools/uvr5/uvr5_weights/HP3_all_vocals.pth +5908891829634926119720241e8573d97cbeb8277110a7512bdb0bd7563258ee /workspace/tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth +8c8fd1582f9aabc363e47af62ddb88df6cae7e064cae75bbf041a067a5e0aee2 /workspace/tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth +01376dd2a571bf3cb9cced680732726d2d732609d09216a610b0d110f133febe /workspace/tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth +56aba59db3bcdd14a14464e62f3129698ecdea62eee0f003b9360923eb3ac79e /workspace/tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth +233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80 /workspace/tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx \ No newline at end of file diff --git a/Docker/links.txt b/Docker/links.txt new file mode 100644 index 00000000..e6603db0 --- /dev/null +++ b/Docker/links.txt @@ -0,0 +1,34 @@ +# GPT-SoVITS models +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s1bert25hz-2kh-longer-epoch%3D68e-step%3D50232.ckpt + out=GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2D488k.pth + out=GPT_SoVITS/pretrained_models/s2D488k.pth +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/s2G488k.pth + out=GPT_SoVITS/pretrained_models/s2G488k.pth +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/config.json + out=GPT_SoVITS/pretrained_models/chinese-hubert-base/config.json +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/preprocessor_config.json + out=GPT_SoVITS/pretrained_models/chinese-hubert-base/preprocessor_config.json +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-hubert-base/pytorch_model.bin + out=GPT_SoVITS/pretrained_models/chinese-hubert-base/pytorch_model.bin +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/config.json + out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/config.json +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/pytorch_model.bin + out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/pytorch_model.bin +https://huggingface.co/lj1995/GPT-SoVITS/resolve/main/chinese-roberta-wwm-ext-large/tokenizer.json + out=GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large/tokenizer.json +# UVR5 +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth + out=tools/uvr5/uvr5_weights/HP2_all_vocals.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth + out=tools/uvr5/uvr5_weights/HP3_all_vocals.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth + out=tools/uvr5/uvr5_weights/HP5_only_main_vocal.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth + out=tools/uvr5/uvr5_weights/VR-DeEchoAggressive.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth + out=tools/uvr5/uvr5_weights/VR-DeEchoDeReverb.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth + out=tools/uvr5/uvr5_weights/VR-DeEchoNormal.pth +https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx + out=tools/uvr5/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..d39bf217 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +# Base CUDA image +FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04 + +# Install 3rd party apps +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=Etc/UTC +RUN apt-get update && \ + apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && \ + rm -rf /var/lib/apt/lists/* && \ + git lfs install + + +# Install python packages +WORKDIR /temp +COPY ./requirements.txt /temp/requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + + +# Copy application +WORKDIR /workspace +COPY . /workspace + + +# Download models +RUN chmod +x /workspace/Docker/download.sh && /workspace/Docker/download.sh + +# Clone 3rd repos +WORKDIR /workspace/tools/damo_asr/models +RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \ + (cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull) +RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \ + (cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull) +RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \ + (cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull) + +RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c" + +WORKDIR /workspace + +EXPOSE 9870 +EXPOSE 9871 +EXPOSE 9872 +EXPOSE 9873 +EXPOSE 9874 + +CMD ["python", "webui.py"] \ No newline at end of file From 948e7fc086dc68310ff09b2a897233cffb609f1b Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 10:30:49 +0800 Subject: [PATCH 02/16] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index abb61f11..589b0f6b 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -10,7 +10,7 @@ 5-清理TEMP文件夹缓存音频等文件 -6-在参考音频结尾留空0.3s,削弱合成音频包含参考音频结尾的问题 +6-大幅削弱合成音频包含参考音频结尾的问题 ### 20240122更新 From 93c47cd9f0c53439536eada18879b4ec5a812ae1 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 16:59:25 +0800 Subject: [PATCH 03/16] fix nan issue(causing sovits zerodivision) fix nan issue(which will cause sovits zerodivision) --- .../prepare_datasets/2-get-hubert-wav32k.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index a5075ff4..bf3ab49e 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -49,10 +49,13 @@ maxx=0.95 alpha=0.5 device="cuda:0" model=cnhubert.get_model() +# is_half=False if(is_half==True): model=model.half().to(device) else: model = model.to(device) + +nan_fails=[] def name2go(wav_name): hubert_path="%s/%s.pt"%(hubert_dir,wav_name) if(os.path.exists(hubert_path)):return @@ -60,25 +63,27 @@ def name2go(wav_name): tmp_audio = load_audio(wav_path, 32000) tmp_max = np.abs(tmp_audio).max() if tmp_max > 2.2: - print("%s-%s-%s-filtered" % (idx0, idx1, tmp_max)) + print("%s-filtered" % (wav_name, tmp_max)) return tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha*32768)) + ((1 - alpha)*32768) * tmp_audio tmp_audio = librosa.resample( tmp_audio32, orig_sr=32000, target_sr=16000 - ) + )#不是重采样问题 tensor_wav16 = torch.from_numpy(tmp_audio) if (is_half == True): tensor_wav16=tensor_wav16.half().to(device) else: tensor_wav16 = tensor_wav16.to(device) ssl=model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1,2).cpu()#torch.Size([1, 768, 215]) - if np.isnan(ssl.detach().numpy()).sum()!= 0:return + if np.isnan(ssl.detach().numpy()).sum()!= 0: + nan_fails.append(wav_name) + print("nan filtered:%s"%wav_name) + return wavfile.write( "%s/%s"%(wav32dir,wav_name), 32000, tmp_audio32.astype("int16"), ) - # torch.save(ssl,hubert_path ) my_save(ssl,hubert_path ) with open(inp_text,"r",encoding="utf8")as f: @@ -92,3 +97,12 @@ for line in lines[int(i_part)::int(all_parts)]: name2go(wav_name) except: print(line,traceback.format_exc()) + +if(len(nan_fails)>0 and is_half==True): + is_half=False + model=model.float() + for wav_name in nan_fails: + try: + name2go(wav_name) + except: + print(wav_name,traceback.format_exc()) \ No newline at end of file From 252cb3799ffb69ffc2e07dda6cd1c82f0ef7c14c Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:00:03 +0800 Subject: [PATCH 04/16] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index 589b0f6b..bbd51f8c 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -20,5 +20,6 @@ 3-音频路径检查。如果尝试读取输入错的路径报错路径不存在,而非ffmpeg错误。 -待修复:-hubert提取在half下出现nan概率更高的问题 +### 20240123更新 +1-hubert提取在half下出现nan概率更高的问题 From d96b7d65ece9a89978dda13d6104d5623b57912e Mon Sep 17 00:00:00 2001 From: Kenn Zhang Date: Tue, 23 Jan 2024 17:00:31 +0800 Subject: [PATCH 05/16] =?UTF-8?q?Docker=E5=8C=96=E5=88=9D=E6=AD=A5?= =?UTF-8?q?=E7=89=88=E6=9C=AC=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 43 +++++++++++++++++++++++++------------------ README.md | 25 +++++++++++++++++++++++++ config.py | 4 ++-- docker-compose.yaml | 31 +++++++++++++++++++++++++++++++ docs/cn/README.md | 26 ++++++++++++++++++++++++++ docs/ja/README.md | 24 ++++++++++++++++++++++++ 6 files changed, 133 insertions(+), 20 deletions(-) create mode 100644 docker-compose.yaml diff --git a/Dockerfile b/Dockerfile index d39bf217..cbf92cb5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,11 @@ # Base CUDA image FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04 +LABEL maintainer="breakstring@hotmail.com" +LABEL version="dev-20240123.03" +LABEL description="Docker image for GPT-SoVITS" + + # Install 3rd party apps ENV DEBIAN_FRONTEND=noninteractive ENV TZ=Etc/UTC @@ -9,33 +14,31 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* && \ git lfs install - -# Install python packages -WORKDIR /temp -COPY ./requirements.txt /temp/requirements.txt -RUN pip install --no-cache-dir -r requirements.txt - - # Copy application WORKDIR /workspace COPY . /workspace - # Download models RUN chmod +x /workspace/Docker/download.sh && /workspace/Docker/download.sh -# Clone 3rd repos -WORKDIR /workspace/tools/damo_asr/models -RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \ - (cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull) -RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \ - (cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull) -RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \ - (cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull) +# 本应该从 requirements.txt 里面安装package,但是由于funasr和modelscope的问题,暂时先在后面手工安装依赖包吧 +RUN pip install --no-cache-dir torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba psutil PyYAML +# 这里强制指定了modelscope和funasr的版本,后面damo_asr的模型让它们自己下载 +RUN pip install --no-cache-dir modelscope~=1.10.0 torchaudio sentencepiece funasr~=0.8.7 -RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c" +# 先屏蔽掉,让容器里自己下载 +# Clone damo_asr +#WORKDIR /workspace/tools/damo_asr/models +#RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && \ +# (cd speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch && git lfs pull) +#RUN git clone --depth 1 https://www.modelscope.cn/iic/speech_fsmn_vad_zh-cn-16k-common-pytorch.git speech_fsmn_vad_zh-cn-16k-common-pytorch && \ +# (cd speech_fsmn_vad_zh-cn-16k-common-pytorch && git lfs pull) +#RUN git clone --depth 1 https://www.modelscope.cn/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git punc_ct-transformer_zh-cn-common-vocab272727-pytorch && \ +# (cd punc_ct-transformer_zh-cn-common-vocab272727-pytorch && git lfs pull) -WORKDIR /workspace +#RUN parallel --will-cite -a /workspace/Docker/damo.sha256 "echo -n {} | sha256sum -c" + +#WORKDIR /workspace EXPOSE 9870 EXPOSE 9871 @@ -43,4 +46,8 @@ EXPOSE 9872 EXPOSE 9873 EXPOSE 9874 +VOLUME /workspace/output +VOLUME /workspace/logs +VOLUME /workspace/SoVITS_weights + CMD ["python", "webui.py"] \ No newline at end of file diff --git a/README.md b/README.md index 7649d7ba..59089eab 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,31 @@ For Chinese ASR (additionally), download models from [Damo ASR Model](https://mo For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`. +### Using Docker + +#### docker-compose.yaml configuration + +1. Environment Variables: + - is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation. + +2. Volumes Configuration,The application's root directory inside the container is set to /workspace. The default docker-compose.yaml lists some practical examples for uploading/downloading content. +3. shm_size: The default available memory for Docker Desktop on Windows is too small, which can cause abnormal operations. Adjust according to your own situation. +4. Under the deploy section, GPU-related settings should be adjusted cautiously according to your system and actual circumstances. + + +#### Running with docker compose +``` +docker compose -f "docker-compose.yaml" up -d +``` + +#### Running with docker command + +As above, modify the corresponding parameters based on your actual situation, then run the following command: +``` +docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03 +``` + + ## Dataset Format The TTS annotation .list file format: diff --git a/config.py b/config.py index ec846b3c..75db9bc8 100644 --- a/config.py +++ b/config.py @@ -1,10 +1,10 @@ -import sys +import sys,os # 推理用的指定模型 sovits_path = "" gpt_path = "" -is_half = True +is_half = eval(os.environ.get("is_half",True)) is_share=False cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base" diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..a772c823 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,31 @@ +version: '3.8' + +services: + gpt-sovits: + image: breakstring/gpt-sovits:dev-20240123.03 + container_name: gpt-sovits-container + environment: + - is_half=False + volumes: + - G:/GPT-SoVITS-DockerTest/output:/workspace/output + - G:/GPT-SoVITS-DockerTest/logs:/workspace/logs + - G:/GPT-SoVITS-DockerTest/SoVITS_weights:/workspace/SoVITS_weights + - G:/GPT-SoVITS-DockerTest/reference:/workspace/reference + working_dir: /workspace + ports: + - "9870:9870" + - "9871:9871" + - "9872:9872" + - "9873:9873" + - "9874:9874" + shm_size: 16G + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: "all" + capabilities: [gpu] + stdin_open: true + tty: true + restart: unless-stopped diff --git a/docs/cn/README.md b/docs/cn/README.md index 27c56689..072dc0d0 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -87,6 +87,32 @@ brew install ffmpeg 下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。 +### 在 Docker 中使用 + +#### docker-compose.yaml 设置 + +1. 环境变量: + - is_half: 半精度/双精度控制。在进行 "SSL extracting" 步骤时如果无法正确生成 4-cnhubert/5-wav32k 目录下的内容时,一般都是它引起的,可以根据实际情况来调整为True或者False。 + +2. Volume设置,容器内的应用根目录设置为 /workspace。 默认的 docker-compose.yaml 中列出了一些实际的例子,便于上传/下载内容。 +3. shm_size:Windows下的Docker Desktop默认可用内存过小,会导致运行异常,根据自己情况酌情设置。 +4. deploy小节下的gpu相关内容,请根据您的系统和实际情况酌情设置。 + + + +#### 通过 docker compose运行 +``` +docker compose -f "docker-compose.yaml" up -d +``` + +#### 通过 docker 命令运行 + +同上,根据您自己的实际情况修改对应的参数,然后运行如下命令: +``` +docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03 +``` + + ### 预训练模型 diff --git a/docs/ja/README.md b/docs/ja/README.md index d0987a82..cadb68b1 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -93,6 +93,30 @@ brew install ffmpeg [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートディレクトリに置きます。 +### Dockerの使用 + +#### docker-compose.yamlの設定 + +1. 環境変数: + - `is_half`:半精度/倍精度の制御。"SSL抽出"ステップ中に`4-cnhubert/5-wav32k`ディレクトリ内の内容が正しく生成されない場合、通常これが原因です。実際の状況に応じてTrueまたはFalseに調整してください。 + +2. ボリューム設定:コンテナ内のアプリケーションのルートディレクトリは`/workspace`に設定されます。デフォルトの`docker-compose.yaml`には、アップロード/ダウンロードの内容の実例がいくつか記載されています。 +3. `shm_size`:WindowsのDocker Desktopのデフォルトの利用可能メモリが小さすぎるため、異常な動作を引き起こす可能性があります。状況に応じて適宜設定してください。 +4. `deploy`セクションのGPUに関連する内容は、システムと実際の状況に応じて慎重に設定してください。 + +#### docker composeで実行する +```markdown +docker compose -f "docker-compose.yaml" up -d +``` + +#### dockerコマンドで実行する + +上記と同様に、実際の状況に基づいて対応するパラメータを変更し、次のコマンドを実行します: +```markdown +docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:dev-20240123.03 +``` + + ### 事前訓練済みモデル From d1ec88193f592ecc50d6ded7508f37ba258fc5c8 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 18:41:05 +0800 Subject: [PATCH 06/16] Update config.py --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index 75db9bc8..8b5f378f 100644 --- a/config.py +++ b/config.py @@ -4,7 +4,7 @@ import sys,os # 推理用的指定模型 sovits_path = "" gpt_path = "" -is_half = eval(os.environ.get("is_half",True)) +is_half = eval(os.environ.get("is_half","True")) is_share=False cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base" From 396043a2ed852288fc736f9fe72c737c014e24ef Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 18:43:15 +0800 Subject: [PATCH 07/16] Add files via upload --- GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index bf3ab49e..71b48a96 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -66,8 +66,9 @@ def name2go(wav_name): print("%s-filtered" % (wav_name, tmp_max)) return tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha*32768)) + ((1 - alpha)*32768) * tmp_audio + tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha*1145.14)) + ((1 - alpha)*1145.14) * tmp_audio tmp_audio = librosa.resample( - tmp_audio32, orig_sr=32000, target_sr=16000 + tmp_audio32b, orig_sr=32000, target_sr=16000 )#不是重采样问题 tensor_wav16 = torch.from_numpy(tmp_audio) if (is_half == True): From da19013b06f856213e9404371a3cff0a142e9090 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 18:52:03 +0800 Subject: [PATCH 08/16] Add files via upload --- GPT_SoVITS/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPT_SoVITS/utils.py b/GPT_SoVITS/utils.py index e1a66ea1..0ce03b33 100644 --- a/GPT_SoVITS/utils.py +++ b/GPT_SoVITS/utils.py @@ -18,7 +18,7 @@ logging.getLogger("matplotlib").setLevel(logging.ERROR) MATPLOTLIB_FLAG = False -logging.basicConfig(stream=sys.stdout, level=logging.WARNING) +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) logger = logging @@ -310,13 +310,13 @@ def check_git_hash(model_dir): def get_logger(model_dir, filename="train.log"): global logger logger = logging.getLogger(os.path.basename(model_dir)) - logger.setLevel(logging.WARNING) + logger.setLevel(logging.DEBUG) formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s") if not os.path.exists(model_dir): os.makedirs(model_dir) h = logging.FileHandler(os.path.join(model_dir, filename)) - h.setLevel(logging.WARNING) + h.setLevel(logging.DEBUG) h.setFormatter(formatter) logger.addHandler(h) return logger From 2e834b305fa33c44416794b7f55e95456684ce4d Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 18:56:08 +0800 Subject: [PATCH 09/16] Add files via upload --- tools/subfix_webui.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/subfix_webui.py b/tools/subfix_webui.py index ad4907b0..d6624d03 100644 --- a/tools/subfix_webui.py +++ b/tools/subfix_webui.py @@ -110,6 +110,7 @@ def b_submit_change(*text_list): def b_delete_audio(*checkbox_list): global g_data_json, g_index, g_max_json_index + b_save_file() change = False for i, checkbox in reversed(list(enumerate(checkbox_list))): if g_index + i < len(g_data_json): @@ -121,8 +122,8 @@ def b_delete_audio(*checkbox_list): if g_index > g_max_json_index: g_index = g_max_json_index g_index = g_index if g_index >= 0 else 0 - # if change: - # b_save_file() + if change: + b_save_file() # return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch) return {"value":g_index,"__type__":"update","maximum":(g_max_json_index if g_max_json_index>=0 else 0)},*b_change_index(g_index, g_batch) @@ -172,6 +173,7 @@ def b_audio_split(audio_breakpoint, *checkbox_list): def b_merge_audio(interval_r, *checkbox_list): global g_data_json , g_max_json_index + b_save_file() checked_index = [] audios_path = [] audios_text = [] From 73cf11e04dcb94fba47376cd127b6bccaab28002 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 20:19:37 +0800 Subject: [PATCH 10/16] Update chinese.py --- GPT_SoVITS/text/chinese.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py index 64c8818f..de3ef011 100644 --- a/GPT_SoVITS/text/chinese.py +++ b/GPT_SoVITS/text/chinese.py @@ -18,7 +18,7 @@ pinyin_to_symbol_map = { for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines() } -import jieba.posseg as psg +import jieba_fast.posseg as psg rep_map = { From 80fffb0ad46e4e7f27948d5a57c88cf342088d50 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 20:19:46 +0800 Subject: [PATCH 11/16] Update tone_sandhi.py --- GPT_SoVITS/text/tone_sandhi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/text/tone_sandhi.py b/GPT_SoVITS/text/tone_sandhi.py index f987a3f4..eafb179e 100644 --- a/GPT_SoVITS/text/tone_sandhi.py +++ b/GPT_SoVITS/text/tone_sandhi.py @@ -14,7 +14,7 @@ from typing import List from typing import Tuple -import jieba +import jieba_fast as jieba from pypinyin import lazy_pinyin from pypinyin import Style From 7a32d77782f26ab8f284643f050ce11c65344dfb Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 20:20:26 +0800 Subject: [PATCH 12/16] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index fedce8ab..a8e72ea9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,4 @@ transformers chardet PyYAML psutil -jieba +jieba_fast From 63cfc839834f73e89f21c2c81b7c6fd949c25f3d Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 20:20:39 +0800 Subject: [PATCH 13/16] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 59089eab..0e80304d 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ sudo apt-get install python3.9-distutils #### Pip Packages ```bash -pip install torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba +pip install torch numpy scipy tensorboard librosa==0.9.2 numba==0.56.4 pytorch-lightning gradio==3.14.0 ffmpeg-python onnxruntime tqdm cn2an pypinyin pyopenjtalk g2p_en chardet transformers jieba_fast ``` #### Additional Requirements From 63625758a99e645f3218dd167924e01a0e3cf0dc Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 20:57:47 +0800 Subject: [PATCH 14/16] Add files via upload --- webui.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/webui.py b/webui.py index 15202267..4461056b 100644 --- a/webui.py +++ b/webui.py @@ -1,4 +1,4 @@ -import os,shutil,sys,pdb +import os,shutil,sys,pdb,re now_dir = os.getcwd() sys.path.append(now_dir) import json,yaml,warnings,torch @@ -85,9 +85,16 @@ os.makedirs(SoVITS_weight_root,exist_ok=True) os.makedirs(GPT_weight_root,exist_ok=True) SoVITS_names,GPT_names = get_weights_names() +def custom_sort_key(s): + # 使用正则表达式提取字符串中的数字部分和非数字部分 + parts = re.split('(\d+)', s) + # 将数字部分转换为整数,非数字部分保持不变 + parts = [int(part) if part.isdigit() else part for part in parts] + return parts + def change_choices(): SoVITS_names, GPT_names = get_weights_names() - return {"choices": sorted(SoVITS_names), "__type__": "update"}, {"choices": sorted(GPT_names), "__type__": "update"} + return {"choices": sorted(SoVITS_names,key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names,key=custom_sort_key), "__type__": "update"} p_label=None p_uvr5=None @@ -733,8 +740,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: with gr.TabItem(i18n("1C-推理")): gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。")) with gr.Row(): - GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names),value=pretrained_gpt_name) - SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names),value=pretrained_sovits_name) + GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name,interactive=True) + SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name,interactive=True) gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True) refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") refresh_button.click(fn=change_choices,inputs=[],outputs=[SoVITS_dropdown,GPT_dropdown]) From 0c691191e894c15686e88279745712b3c6dc232f Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 20:57:51 +0800 Subject: [PATCH 15/16] Add files via upload --- GPT_SoVITS/inference_webui.py | 110 ++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 39 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 246748ae..3046d7af 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -1,4 +1,5 @@ -import os +import os,re +import pdb gpt_path = os.environ.get( "gpt_path", "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" @@ -42,8 +43,6 @@ if is_half == True: else: bert_model = bert_model.to(device) - -# bert_model=bert_model.to(device) def get_bert_feature(text, word2ph): with torch.no_grad(): inputs = tokenizer(text, return_tensors="pt") @@ -57,15 +56,8 @@ def get_bert_feature(text, word2ph): repeat_feature = res[i].repeat(word2ph[i], 1) phone_level_feature.append(repeat_feature) phone_level_feature = torch.cat(phone_level_feature, dim=0) - # if(is_half==True):phone_level_feature=phone_level_feature.half() return phone_level_feature.T - -n_semantic = 1024 - -dict_s2=torch.load(sovits_path,map_location="cpu") -hps=dict_s2["config"] - class DictToAttrRecursive(dict): def __init__(self, input_dict): super().__init__(input_dict) @@ -94,40 +86,48 @@ class DictToAttrRecursive(dict): raise AttributeError(f"Attribute {item} not found") -hps = DictToAttrRecursive(hps) - -hps.model.semantic_frame_rate = "25hz" -dict_s1 = torch.load(gpt_path, map_location="cpu") -config = dict_s1["config"] ssl_model = cnhubert.get_model() if is_half == True: ssl_model = ssl_model.half().to(device) else: ssl_model = ssl_model.to(device) -vq_model = SynthesizerTrn( - hps.data.filter_length // 2 + 1, - hps.train.segment_size // hps.data.hop_length, - n_speakers=hps.data.n_speakers, - **hps.model -) -if is_half == True: - vq_model = vq_model.half().to(device) -else: - vq_model = vq_model.to(device) -vq_model.eval() -print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) -hz = 50 -max_sec = config["data"]["max_sec"] -t2s_model = Text2SemanticLightningModule(config, "ojbk", is_train=False) -t2s_model.load_state_dict(dict_s1["weight"]) -if is_half == True: - t2s_model = t2s_model.half() -t2s_model = t2s_model.to(device) -t2s_model.eval() -total = sum([param.nelement() for param in t2s_model.parameters()]) -print("Number of parameter: %.2fM" % (total / 1e6)) +def change_sovits_weights(sovits_path): + global vq_model,hps + dict_s2=torch.load(sovits_path,map_location="cpu") + hps=dict_s2["config"] + hps = DictToAttrRecursive(hps) + hps.model.semantic_frame_rate = "25hz" + vq_model = SynthesizerTrn( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + n_speakers=hps.data.n_speakers, + **hps.model + ) + del vq_model.enc_q + if is_half == True: + vq_model = vq_model.half().to(device) + else: + vq_model = vq_model.to(device) + vq_model.eval() + print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) +change_sovits_weights(sovits_path) +def change_gpt_weights(gpt_path): + global hz,max_sec,t2s_model,config + hz = 50 + dict_s1 = torch.load(gpt_path, map_location="cpu") + config = dict_s1["config"] + max_sec = config["data"]["max_sec"] + t2s_model = Text2SemanticLightningModule(config, "****", is_train=False) + t2s_model.load_state_dict(dict_s1["weight"]) + if is_half == True: + t2s_model = t2s_model.half() + t2s_model = t2s_model.to(device) + t2s_model.eval() + total = sum([param.nelement() for param in t2s_model.parameters()]) + print("Number of parameter: %.2fM" % (total / 1e6)) +change_gpt_weights(gpt_path) def get_spepc(hps, filename): audio = load_audio(filename, int(hps.data.sampling_rate)) @@ -325,14 +325,46 @@ def cut3(inp): inp = inp.strip("\n") return "\n".join(["%s。" % item for item in inp.strip("。").split("。")]) +def custom_sort_key(s): + # 使用正则表达式提取字符串中的数字部分和非数字部分 + parts = re.split('(\d+)', s) + # 将数字部分转换为整数,非数字部分保持不变 + parts = [int(part) if part.isdigit() else part for part in parts] + return parts + +def change_choices(): + SoVITS_names, GPT_names = get_weights_names() + return {"choices": sorted(SoVITS_names,key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names,key=custom_sort_key), "__type__": "update"} + +pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth" +pretrained_gpt_name="GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" +SoVITS_weight_root="SoVITS_weights" +GPT_weight_root="GPT_weights" +os.makedirs(SoVITS_weight_root,exist_ok=True) +os.makedirs(GPT_weight_root,exist_ok=True) +def get_weights_names(): + SoVITS_names = [pretrained_sovits_name] + for name in os.listdir(SoVITS_weight_root): + if name.endswith(".pth"):SoVITS_names.append("%s/%s"%(SoVITS_weight_root,name)) + GPT_names = [pretrained_gpt_name] + for name in os.listdir(GPT_weight_root): + if name.endswith(".ckpt"): GPT_names.append("%s/%s"%(GPT_weight_root,name)) + return SoVITS_names,GPT_names +SoVITS_names,GPT_names = get_weights_names() with gr.Blocks(title="GPT-SoVITS WebUI") as app: gr.Markdown( value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") ) - # with gr.Tabs(): - # with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): with gr.Group(): + gr.Markdown(value=i18n("模型切换")) + with gr.Row(): + GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path,interactive=True) + SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path,interactive=True) + refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") + refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown]) + SoVITS_dropdown.change(change_sovits_weights,[SoVITS_dropdown],[]) + GPT_dropdown.change(change_gpt_weights,[GPT_dropdown],[]) gr.Markdown(value=i18n("*请上传并填写参考信息")) with gr.Row(): inp_ref = gr.Audio(label=i18n("请上传参考音频"), type="filepath") From 69f588dfd8abc5b41df82aaef5c4a02b86b59795 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 23 Jan 2024 21:00:40 +0800 Subject: [PATCH 16/16] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index bbd51f8c..93fc8be7 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -21,5 +21,11 @@ 3-音频路径检查。如果尝试读取输入错的路径报错路径不存在,而非ffmpeg错误。 ### 20240123更新 -1-hubert提取在half下出现nan概率更高的问题 +1-解决hubert提取nan导致SoVITS/GPT训练报错ZeroDivisionError的问题 + +2-支持推理界面快速切换模型 + +3-优化模型文件排序逻辑 + +4-中文分词使用jieba_fast代替jieba