From e066cd93d2351586aa9de99e62143db94f6fef4c Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Mon, 4 Mar 2024 00:13:28 +0800 Subject: [PATCH 1/7] fix auto LangSegment misunderstand KO --- GPT_SoVITS/inference_webui.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index d2f3f949..cbde9f8c 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -257,11 +257,15 @@ def get_phones_and_bert(text,language): elif language in {"zh", "ja","auto"}: textlist=[] langlist=[] - LangSegment.setfilters(["zh","ja","en"]) + LangSegment.setfilters(["zh","ja","en","ko"]) if language == "auto": for tmp in LangSegment.getTexts(text): - langlist.append(tmp["lang"]) - textlist.append(tmp["text"]) + if tmp["lang"] == "ko": + langlist.append("zh") + textlist.append(tmp["text"]) + else: + langlist.append(tmp["lang"]) + textlist.append(tmp["text"]) else: for tmp in LangSegment.getTexts(text): if tmp["lang"] == "en": From a2761038c05c72be94b31fb01f1cf05a0d4266fb Mon Sep 17 00:00:00 2001 From: DW <147780325+D3lik@users.noreply.github.com> Date: Mon, 4 Mar 2024 20:30:59 +1100 Subject: [PATCH 2/7] Update en_US.json --- i18n/locale/en_US.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/i18n/locale/en_US.json b/i18n/locale/en_US.json index 0a07679c..292a915c 100644 --- a/i18n/locale/en_US.json +++ b/i18n/locale/en_US.json @@ -2,6 +2,18 @@ "很遗憾您这没有能用的显卡来支持您训练": "Unfortunately, there is no compatible GPU available to support your training.", "UVR5已开启": "UVR5 opened ", "UVR5已关闭": "UVR5 closed", + "输入文件夹路径": "Input folder path", + "输出文件夹路径": "Output folder path", + "ASR 模型": "ASR model", + "ASR 模型尺寸": "ASR model size", + "ASR 语言设置": "ASR language", + "模型切换": "Model switch", + "是否开启dpo训练选项(实验性)": "Enable DPO training (experimental feature)", + "开启无参考文本模式。不填参考文本亦相当于开启。": "Enable no reference mode. If you don't fill 'Text for reference audio', no reference mode will be enabled.", + "使用无参考文本模式时建议使用微调的GPT": "Please use your trained GPT model if you don't use reference audio.", + "后续将支持转音素、手工修改音素、语音合成分步执行。": " Step-to-step phoneme transformation and modification coming soon!", + "gpt采样参数(无参考文本时不要太低):": "GPT parameters:", + "按标点符号切": "Slice by every punct", "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.": "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE for details.", "0-前置数据集获取工具": "0-Fetch dataset", "0a-UVR5人声伴奏分离&去混响去延迟工具": "0a-UVR5 webui (for vocal separation, deecho, dereverb and denoise)", From 37206edbd967717cb0d95d88b8415a13d226908e Mon Sep 17 00:00:00 2001 From: DW <147780325+D3lik@users.noreply.github.com> Date: Mon, 4 Mar 2024 20:37:18 +1100 Subject: [PATCH 3/7] Update inference_webui.py --- GPT_SoVITS/inference_webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index df32d365..ee099627 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -584,7 +584,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: interactive=True, ) with gr.Row(): - gr.Markdown("gpt采样参数(无参考文本时不要太低):") + gr.Markdown(value=i18n("gpt采样参数(无参考文本时不要太低):")) top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True) top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True) temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True) From 93075f52ddc80c70e634534a2ad960d1f2b66e58 Mon Sep 17 00:00:00 2001 From: Yuze Wang Date: Tue, 5 Mar 2024 15:19:32 +0800 Subject: [PATCH 4/7] added the ability to automatically switch to cpu if fast whisper don't compile with cuda --- tools/asr/fasterwhisper_asr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index 5f49de70..9371324c 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -4,6 +4,7 @@ os.environ["HF_ENDPOINT"]="https://hf-mirror.com" import traceback import requests from glob import glob +import torch from faster_whisper import WhisperModel from tqdm import tqdm @@ -45,8 +46,9 @@ def execute_asr(input_folder, output_folder, model_size, language,precision): if language == 'auto': language = None #不设置语种由模型自动输出概率最高的语种 print("loading faster whisper model:",model_size,model_path) + device = 'cuda' if torch.cuda.is_available() else 'cpu' try: - model = WhisperModel(model_path, device="cuda", compute_type=precision) + model = WhisperModel(model_path, device=device, compute_type=precision) except: return print(traceback.format_exc()) output = [] From 616be20db3cf94f1cd663782fea61b2370704193 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:03:21 +0800 Subject: [PATCH 5/7] =?UTF-8?q?=E5=A6=82=E6=9E=9C=E7=94=A8=E8=8B=B1?= =?UTF-8?q?=E6=96=87ASR=E4=B8=8D=E5=86=8D=E9=9C=80=E8=A6=81=E5=85=88?= =?UTF-8?q?=E4=B8=8B=E4=B8=AD=E6=96=87funasr=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 如果用英文ASR不再需要先下中文funasr模型 --- tools/asr/fasterwhisper_asr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index 9371324c..f7b31aab 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -10,7 +10,6 @@ from faster_whisper import WhisperModel from tqdm import tqdm from tools.asr.config import check_fw_local_models -from tools.asr.funasr_asr import only_asr os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" @@ -70,6 +69,8 @@ def execute_asr(input_folder, output_folder, model_size, language,precision): if info.language == "zh": print("检测为中文文本,转funasr处理") + if("only_asr"not in globals()): + from tools.asr.funasr_asr import only_asr##如果用英文就不需要导入下载模型 text = only_asr(file) if text == '': From 34e35012f390f5371f2645f06b792b49fbf209be Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 6 Mar 2024 23:27:29 +0800 Subject: [PATCH 6/7] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index 8afd3514..d0d07033 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -127,7 +127,7 @@ ### 20240221更新 -1-数据处理添加语音降噪选项 +1-数据处理添加语音降噪选项(降噪为只剩16k采样率,除非底噪很大先不急着用哦。) 2-中文日文前端处理优化 https://github.com/RVC-Boss/GPT-SoVITS/pull/559 https://github.com/RVC-Boss/GPT-SoVITS/pull/556 https://github.com/RVC-Boss/GPT-SoVITS/pull/532 https://github.com/RVC-Boss/GPT-SoVITS/pull/507 https://github.com/RVC-Boss/GPT-SoVITS/pull/509 @@ -135,9 +135,22 @@ 4-colab修复不开启公网url +### 20240306更新 + +1-推理加速50%(RTX3090+pytorch2.2.1+cu11.8tested)https://github.com/RVC-Boss/GPT-SoVITS/pull/672 + +2-如果用faster whisper非中文ASR不再需要先下中文funasr模型 + +3-修复uvr5去混响模型 是否混响 反的 https://github.com/RVC-Boss/GPT-SoVITS/pull/610 + +4-faster whisper如果无cuda可用自动cpu推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/675 + +5-修改is_half的判断使在Mac上能正常CPU推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/573 + + todolist: -1-中文多音字推理优化 +1-中文多音字推理优化(有没有人来测试的,欢迎把测试结果写在pr评论区里) https://github.com/RVC-Boss/GPT-SoVITS/pull/488 From 3905f6f2feb4e9ccf5ba9be8dac88a9e0518d412 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 6 Mar 2024 23:29:52 +0800 Subject: [PATCH 7/7] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index d0d07033..625e4782 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -137,7 +137,7 @@ ### 20240306更新 -1-推理加速50%(RTX3090+pytorch2.2.1+cu11.8tested)https://github.com/RVC-Boss/GPT-SoVITS/pull/672 +1-推理加速50%(RTX3090+pytorch2.2.1+cu11.8+win10+py39 tested)https://github.com/RVC-Boss/GPT-SoVITS/pull/672 2-如果用faster whisper非中文ASR不再需要先下中文funasr模型