Update inference_webui.py

2026-06-05 05:48:14 +08:00 · 2024-01-30 14:56:47 +08:00 · 2024-01-30 14:56:47 +08:00 · cd29e98732
commit cd29e98732
parent 780e43d880
1 changed files with 108 additions and 61 deletions
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -1,4 +1,5 @@
 import os, re, logging
 logging.getLogger("markdown_it").setLevel(logging.ERROR)
 logging.getLogger("urllib3").setLevel(logging.ERROR)
 logging.getLogger("httpcore").setLevel(logging.ERROR)
@ -46,6 +47,7 @@ from transformers import AutoModelForMaskedLM, AutoTokenizer
 import numpy as np
 import librosa, torch
 from feature_extractor import cnhubert
 cnhubert.cnhubert_base_path = cnhubert_base_path
 from module.models import SynthesizerTrn
@ -56,6 +58,7 @@ from time import time as ttime
 from module.mel_processing import spectrogram_torch
 from my_utils import load_audio
 from tools.i18n.i18n import I18nAuto
 i18n = I18nAuto()
 os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
@ -74,6 +77,7 @@ if is_half == True:
 else:
    bert_model = bert_model.to(device)
 def get_bert_feature(text, word2ph):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt")
@ -89,6 +93,7 @@ def get_bert_feature(text, word2ph):
    phone_level_feature = torch.cat(phone_level_feature, dim=0)
    return phone_level_feature.T
 class DictToAttrRecursive(dict):
    def __init__(self, input_dict):
        super().__init__(input_dict)
@ -123,6 +128,7 @@ if is_half == True:
 else:
    ssl_model = ssl_model.to(device)
 def change_sovits_weights(sovits_path):
    global vq_model, hps
    dict_s2 = torch.load(sovits_path, map_location="cpu")
@ -143,9 +149,13 @@ def change_sovits_weights(sovits_path):
        vq_model = vq_model.to(device)
    vq_model.eval()
    print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
-    with open("./sweight.txt","w",encoding="utf-8")as f:f.write(sovits_path)
+    with open("./sweight.txt", "w", encoding="utf-8") as f:
        f.write(sovits_path)
 change_sovits_weights(sovits_path)
 def change_gpt_weights(gpt_path):
    global hz, max_sec, t2s_model, config
    hz = 50
@ -161,8 +171,11 @@ def change_gpt_weights(gpt_path):
    total = sum([param.nelement() for param in t2s_model.parameters()])
    print("Number of parameter: %.2fM" % (total / 1e6))
    with open("./gweight.txt", "w", encoding="utf-8") as f: f.write(gpt_path)
 change_gpt_weights(gpt_path)
 def get_spepc(hps, filename):
    audio = load_audio(filename, int(hps.data.sampling_rate))
    audio = torch.FloatTensor(audio)
@ -262,12 +275,16 @@ def nonen_get_bert_inf(text, language):
    return bert
 splits = {"，", "。", "？", "！", ",", ".", "?", "!", "~", ":", "：", "—", "…", }
 def get_first(text):
    pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]"
    text = re.split(pattern, text)[0].strip()
    return text
 def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切")):
    t0 = ttime()
    prompt_text = prompt_text.strip("\n")
@ -307,10 +324,16 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
        phones1, word2ph1, norm_text1 = clean_text_inf(prompt_text, prompt_language)
    else:
        phones1, word2ph1, norm_text1 = nonen_clean_text_inf(prompt_text, prompt_language)
-    if(how_to_cut==i18n("凑四句一切")):text=cut1(text)
+    if (how_to_cut == i18n("凑四句一切")):
-    elif(how_to_cut==i18n("凑50字一切")):text=cut2(text)
+        text = cut1(text)
-    elif(how_to_cut==i18n("按中文句号。切")):text=cut3(text)
+    elif (how_to_cut == i18n("凑50字一切")):
-    elif(how_to_cut==i18n("按英文句号.切")):text=cut4(text)
+        text = cut2(text)
    elif (how_to_cut == i18n("按中文句号。切")):
        text = cut3(text)
    elif (how_to_cut == i18n("按英文句号.切")):
        text = cut4(text)
    elif (how_to_cut == i18n("按标点符号切")):
        text = cut5(text)
    text = text.replace("\n\n", "\n").replace("\n\n", "\n").replace("\n\n", "\n")
    print(i18n("实际输入的目标文本(切句后):"), text)
    texts = text.split("\n")
@ -380,6 +403,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
        np.int16
    )
 def split(todo_text):
    todo_text = todo_text.replace("……", "。").replace("——", "，")
    if todo_text[-1] not in splits:
@ -440,10 +464,25 @@ def cut2(inp):
 def cut3(inp):
    inp = inp.strip("\n")
    return "\n".join(["%s" % item for item in inp.strip("。").split("。")])
 def cut4(inp):
    inp = inp.strip("\n")
    return "\n".join(["%s" % item for item in inp.strip(".").split(".")])
 # contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py
 def cut5(inp):
    # if not re.search(r'[^\w\s]', inp[-1]):
    # inp += '。'
    inp = inp.strip("\n")
    punds = r'[,.;?!、，。？！;：]'
    items = re.split(f'({punds})', inp)
    items = ["".join(group) for group in zip(items[::2], items[1::2])]
    opt = "\n".join(items)
    return opt
 def custom_sort_key(s):
    # 使用正则表达式提取字符串中的数字部分和非数字部分
    parts = re.split('(\d+)', s)
@ -451,16 +490,20 @@ def custom_sort_key(s):
    parts = [int(part) if part.isdigit() else part for part in parts]
    return parts
 def change_choices():
    SoVITS_names, GPT_names = get_weights_names()
    return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
 pretrained_sovits_name = "GPT_SoVITS/pretrained_models/s2G488k.pth"
 pretrained_gpt_name = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
 SoVITS_weight_root = "SoVITS_weights"
 GPT_weight_root = "GPT_weights"
 os.makedirs(SoVITS_weight_root, exist_ok=True)
 os.makedirs(GPT_weight_root, exist_ok=True)
 def get_weights_names():
    SoVITS_names = [pretrained_sovits_name]
    for name in os.listdir(SoVITS_weight_root):
@ -469,6 +512,8 @@ def get_weights_names():
    for name in os.listdir(GPT_weight_root):
        if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (GPT_weight_root, name))
    return SoVITS_names, GPT_names
 SoVITS_names, GPT_names = get_weights_names()
 with gr.Blocks(title="GPT-SoVITS WebUI") as app:
@ -499,8 +544,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
            )
            how_to_cut = gr.Radio(
                label=i18n("怎么切"),
-                choices=[i18n("不切"),i18n("凑四句一切"),i18n("凑50字一切"),i18n("按中文句号。切"),i18n("按英文句号.切"),],
+                choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n("按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
-                value=i18n("凑50字一切"),
+                value=i18n("凑四句一切"),
                interactive=True,
            )
            inference_button = gr.Button(i18n("合成语音"), variant="primary")
@ -519,11 +564,13 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
            button2 = gr.Button(i18n("凑50字一切"), variant="primary")
            button3 = gr.Button(i18n("按中文句号。切"), variant="primary")
            button4 = gr.Button(i18n("按英文句号.切"), variant="primary")
            button5 = gr.Button(i18n("按标点符号切"), variant="primary")
            text_opt = gr.Textbox(label=i18n("切分后文本"), value="")
            button1.click(cut1, [text_inp], [text_opt])
            button2.click(cut2, [text_inp], [text_opt])
            button3.click(cut3, [text_inp], [text_opt])
            button4.click(cut4, [text_inp], [text_opt])
            button5.click(cut5, [text_inp], [text_opt])
        gr.Markdown(value=i18n("后续将支持混合语种编码文本输入。"))
 app.queue(concurrency_count=511, max_size=1022).launch(