Merge branch 'RVC-Boss:main' into main

2026-06-05 13:58:18 +08:00 · 2024-03-06 16:12:51 +00:00 · 2024-03-06 16:12:51 +00:00 · 9be39a8739
commit 9be39a8739
parent 07c620c17e 3905f6f2fe
4 changed files with 40 additions and 8 deletions
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -258,11 +258,15 @@ def get_phones_and_bert(text,language):
    elif language in {"zh", "ja","auto"}:
        textlist=[]
        langlist=[]
-        LangSegment.setfilters(["zh","ja","en"])
+        LangSegment.setfilters(["zh","ja","en","ko"])
        if language == "auto":
            for tmp in LangSegment.getTexts(text):
-                langlist.append(tmp["lang"])
-                textlist.append(tmp["text"])
+                if tmp["lang"] == "ko":
+                    langlist.append("zh")
+                    textlist.append(tmp["text"])
+                else:
+                    langlist.append(tmp["lang"])
+                    textlist.append(tmp["text"])
        else:
            for tmp in LangSegment.getTexts(text):
                if tmp["lang"] == "en":
@ -580,7 +584,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
                interactive=True,
            )
            with gr.Row():
-                gr.Markdown("gpt采样参数(无参考文本时不要太低)：")
+                gr.Markdown(value=i18n("gpt采样参数(无参考文本时不要太低)："))
                top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
                top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
                temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
--- a/docs/cn/Changelog_CN.md
+++ b/docs/cn/Changelog_CN.md
@ -127,7 +127,7 @@

 ### 20240221更新

-1-数据处理添加语音降噪选项
+1-数据处理添加语音降噪选项（降噪为只剩16k采样率，除非底噪很大先不急着用哦。）

 2-中文日文前端处理优化 https://github.com/RVC-Boss/GPT-SoVITS/pull/559 https://github.com/RVC-Boss/GPT-SoVITS/pull/556 https://github.com/RVC-Boss/GPT-SoVITS/pull/532 https://github.com/RVC-Boss/GPT-SoVITS/pull/507 https://github.com/RVC-Boss/GPT-SoVITS/pull/509

@ -135,9 +135,22 @@

 4-colab修复不开启公网url

+### 20240306更新
+
+1-推理加速50%（RTX3090+pytorch2.2.1+cu11.8+win10+py39 tested）https://github.com/RVC-Boss/GPT-SoVITS/pull/672
+
+2-如果用faster whisper非中文ASR不再需要先下中文funasr模型
+
+3-修复uvr5去混响模型 是否混响 反的 https://github.com/RVC-Boss/GPT-SoVITS/pull/610
+
+4-faster whisper如果无cuda可用自动cpu推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/675
+
+5-修改is_half的判断使在Mac上能正常CPU推理 https://github.com/RVC-Boss/GPT-SoVITS/pull/573
+
+
 todolist：

-1-中文多音字推理优化
+1-中文多音字推理优化(有没有人来测试的，欢迎把测试结果写在pr评论区里) https://github.com/RVC-Boss/GPT-SoVITS/pull/488



--- a/i18n/locale/en_US.json
+++ b/i18n/locale/en_US.json
@ -2,6 +2,18 @@
  "很遗憾您这没有能用的显卡来支持您训练": "Unfortunately, there is no compatible GPU available to support your training.",
  "UVR5已开启": "UVR5 opened ",
  "UVR5已关闭": "UVR5 closed",
+  "输入文件夹路径": "Input folder path",
+  "输出文件夹路径": "Output folder path",
+  "ASR 模型": "ASR model",
+  "ASR 模型尺寸": "ASR model size",
+  "ASR 语言设置": "ASR language",
+  "模型切换": "Model switch",
+  "是否开启dpo训练选项(实验性)": "Enable DPO training (experimental feature)",
+  "开启无参考文本模式。不填参考文本亦相当于开启。": "Enable no reference mode. If you don't fill 'Text for reference audio', no reference mode will be enabled.",
+  "使用无参考文本模式时建议使用微调的GPT": "Please use your trained GPT model if you don't use reference audio.",
+  "后续将支持转音素、手工修改音素、语音合成分步执行。": " Step-to-step phoneme transformation and modification coming soon!",
+  "gpt采样参数(无参考文本时不要太低)：": "GPT parameters:",
+  "按标点符号切": "Slice by every punct",
  "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible. <br>If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory <b>Agreement-LICENSE</b> for details.",
  "0-前置数据集获取工具": "0-Fetch dataset",
  "0a-UVR5人声伴奏分离&去混响去延迟工具": "0a-UVR5 webui (for vocal separation, deecho, dereverb and denoise)",
--- a/tools/asr/fasterwhisper_asr.py
+++ b/tools/asr/fasterwhisper_asr.py
@ -4,12 +4,12 @@ os.environ["HF_ENDPOINT"]="https://hf-mirror.com"
 import traceback
 import requests
 from glob import glob
+import torch

 from faster_whisper import WhisperModel
 from tqdm import tqdm

 from tools.asr.config import check_fw_local_models
-from tools.asr.funasr_asr import only_asr

 os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

@ -45,8 +45,9 @@ def execute_asr(input_folder, output_folder, model_size, language,precision):
    if language == 'auto':
        language = None #不设置语种由模型自动输出概率最高的语种
    print("loading faster whisper model:",model_size,model_path)
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    try:
-        model = WhisperModel(model_path, device="cuda", compute_type=precision)
+        model = WhisperModel(model_path, device=device, compute_type=precision)
    except:
        return print(traceback.format_exc())
    output = []
@ -68,6 +69,8 @@ def execute_asr(input_folder, output_folder, model_size, language,precision):

            if info.language == "zh":
                print("检测为中文文本,转funasr处理")
+                if("only_asr"not in globals()):
+                    from tools.asr.funasr_asr import only_asr##如果用英文就不需要导入下载模型
                text = only_asr(file)

            if text == '':