mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
support timebre mixing
support timebre mixing
This commit is contained in:
parent
2b142405b8
commit
21f05ee471
@ -7,6 +7,8 @@
|
||||
全部按日文识别
|
||||
'''
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
||||
@ -14,6 +16,7 @@ logging.getLogger("httpx").setLevel(logging.ERROR)
|
||||
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
||||
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
|
||||
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
||||
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
||||
import LangSegment, os, re, sys, json
|
||||
import pdb
|
||||
import torch
|
||||
@ -257,6 +260,8 @@ change_gpt_weights(gpt_path)
|
||||
def get_spepc(hps, filename):
|
||||
audio = load_audio(filename, int(hps.data.sampling_rate))
|
||||
audio = torch.FloatTensor(audio)
|
||||
maxx=audio.abs().max()
|
||||
if(maxx>1):audio/=min(2,maxx)
|
||||
audio_norm = audio
|
||||
audio_norm = audio_norm.unsqueeze(0)
|
||||
spec = spectrogram_torch(
|
||||
@ -387,7 +392,7 @@ def merge_short_text_in_array(texts, threshold):
|
||||
##ref_wav_path+prompt_text+prompt_language+text(单个)+text_language+top_k+top_p+temperature
|
||||
# cache_tokens={}#暂未实现清理机制
|
||||
cache= {}
|
||||
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free = False,speed=1,if_freeze=False):
|
||||
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free = False,speed=1,if_freeze=False,inp_refs=123):
|
||||
global cache
|
||||
if ref_wav_path:pass
|
||||
else:gr.Warning(i18n('请上传参考音频'))
|
||||
@ -498,12 +503,16 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
||||
pred_semantic = pred_semantic[:, -idx:].unsqueeze(0)
|
||||
cache[i_text]=pred_semantic
|
||||
t3 = ttime()
|
||||
refer = get_spepc(hps, ref_wav_path) # .to(device)
|
||||
if is_half == True:
|
||||
refer = refer.half().to(device)
|
||||
else:
|
||||
refer = refer.to(device)
|
||||
audio = (vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refer,speed=speed).detach().cpu().numpy()[0, 0])
|
||||
refers=[]
|
||||
if(inp_refs):
|
||||
for path in inp_refs:
|
||||
try:
|
||||
refer = get_spepc(hps, path.name).to(dtype).to(device)
|
||||
refers.append(refer)
|
||||
except:
|
||||
traceback.print_exc()
|
||||
if(len(refers)==0):refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)]
|
||||
audio = (vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers,speed=speed).detach().cpu().numpy()[0, 0])
|
||||
max_audio=np.abs(audio).max()#简单防止16bit爆音
|
||||
if max_audio>1:audio/=max_audio
|
||||
audio_opt.append(audio)
|
||||
@ -692,13 +701,14 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
prompt_language = gr.Dropdown(
|
||||
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"), scale=14
|
||||
)
|
||||
inp_refs = gr.File(label=i18n("可选项:通过拖拽多个文件上传多个参考音频(建议同性),平均融合他们的音色。如不填写此项,音色由左侧单个参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。"),file_count="file_count",scale=13)
|
||||
gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
|
||||
with gr.Row():
|
||||
with gr.Column(scale=13):
|
||||
text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=26, max_lines=26)
|
||||
with gr.Column(scale=7):
|
||||
text_language = gr.Dropdown(
|
||||
label=i18n("需要合成的语种"), choices=list(dict_language.keys()), value=i18n("中文"), scale=1
|
||||
label=i18n("需要合成的语种")+i18n(".限制范围越小判别效果越好。"), choices=list(dict_language.keys()), value=i18n("中文"), scale=1
|
||||
)
|
||||
how_to_cut = gr.Dropdown(
|
||||
label=i18n("怎么切"),
|
||||
@ -707,10 +717,10 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
interactive=True, scale=1
|
||||
)
|
||||
gr.Markdown(value=html_center(i18n("语速调整,高为更快")))
|
||||
if_freeze=gr.Checkbox(label=i18n("是否直接对上次合成结果调整语速。防止随机性。"), value=False, interactive=True,show_label=True, scale=1)
|
||||
if_freeze=gr.Checkbox(label=i18n("是否直接对上次合成结果调整语速和音色。防止随机性。"), value=False, interactive=True,show_label=True, scale=1)
|
||||
speed = gr.Slider(minimum=0.6,maximum=1.65,step=0.05,label=i18n("语速"),value=1,interactive=True, scale=1)
|
||||
gr.Markdown(html_center(i18n("GPT采样参数(无参考文本时不要太低。不懂就用默认):")))
|
||||
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=10,interactive=True, scale=1)
|
||||
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=15,interactive=True, scale=1)
|
||||
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True, scale=1)
|
||||
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True, scale=1)
|
||||
# with gr.Column():
|
||||
@ -723,7 +733,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
|
||||
inference_button.click(
|
||||
get_tts_wav,
|
||||
[inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free,speed,if_freeze],
|
||||
[inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free,speed,if_freeze,inp_refs],
|
||||
[output],
|
||||
)
|
||||
SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
|
||||
|
Loading…
x
Reference in New Issue
Block a user