mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-07 23:48:48 +08:00
modified: .gitignore
modified: GPT_SoVITS/AR/models/t2s_model.py modified: GPT_SoVITS/AR/models/utils.py modified: GPT_SoVITS/inference_webui.py
This commit is contained in:
parent
0357d0c7a4
commit
b9a9c88a8c
4
.gitignore
vendored
4
.gitignore
vendored
@ -9,4 +9,6 @@ logs
|
|||||||
reference
|
reference
|
||||||
GPT_weights
|
GPT_weights
|
||||||
SoVITS_weights
|
SoVITS_weights
|
||||||
TEMP
|
TEMP
|
||||||
|
gweight.txt
|
||||||
|
sweight.txt
|
||||||
|
@ -329,6 +329,7 @@ class Text2SemanticDecoder(nn.Module):
|
|||||||
bert_feature,
|
bert_feature,
|
||||||
top_k: int = -100,
|
top_k: int = -100,
|
||||||
top_p: int = 100,
|
top_p: int = 100,
|
||||||
|
min_p: int = 80,
|
||||||
early_stop_num: int = -1,
|
early_stop_num: int = -1,
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
):
|
):
|
||||||
@ -397,7 +398,7 @@ class Text2SemanticDecoder(nn.Module):
|
|||||||
if(idx==0):###第一次跑不能EOS否则没有了
|
if(idx==0):###第一次跑不能EOS否则没有了
|
||||||
logits = logits[:, :-1] ###刨除1024终止符号的概率
|
logits = logits[:, :-1] ###刨除1024终止符号的概率
|
||||||
samples = sample(
|
samples = sample(
|
||||||
logits[0], y, top_k=top_k, top_p=top_p, repetition_penalty=1.35, temperature=temperature
|
logits[0], y, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=1.35, temperature=temperature
|
||||||
)[0].unsqueeze(0)
|
)[0].unsqueeze(0)
|
||||||
# 本次生成的 semantic_ids 和之前的 y 构成新的 y
|
# 本次生成的 semantic_ids 和之前的 y 构成新的 y
|
||||||
# print(samples.shape)#[1,1]#第一个1是bs
|
# print(samples.shape)#[1,1]#第一个1是bs
|
||||||
|
@ -112,6 +112,7 @@ def logits_to_probs(
|
|||||||
previous_tokens: Optional[torch.Tensor] = None,
|
previous_tokens: Optional[torch.Tensor] = None,
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
top_k: Optional[int] = None,
|
top_k: Optional[int] = None,
|
||||||
|
min_p: Optional[float] = None,
|
||||||
top_p: Optional[int] = None,
|
top_p: Optional[int] = None,
|
||||||
repetition_penalty: float = 1.0,
|
repetition_penalty: float = 1.0,
|
||||||
):
|
):
|
||||||
@ -127,6 +128,12 @@ def logits_to_probs(
|
|||||||
)
|
)
|
||||||
logits.scatter_(dim=0, index=previous_tokens, src=score)
|
logits.scatter_(dim=0, index=previous_tokens, src=score)
|
||||||
|
|
||||||
|
# Min-p sampling, we sample tokens with a probability above the min_p * max_prob where max_prob is the maximum probability in the distribution
|
||||||
|
if min_p is not None and min_p < 1.0 and min_p > 0.0:
|
||||||
|
max_prob = torch.max(logits)
|
||||||
|
logits = torch.where(logits < min_p * max_prob, -float("Inf"), logits)
|
||||||
|
|
||||||
|
# Top-p sampling, we sample from the smallest set of tokens whose cumulative probability mass exceeds the threshold `p`
|
||||||
if top_p is not None and top_p < 1.0:
|
if top_p is not None and top_p < 1.0:
|
||||||
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
|
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
|
||||||
cum_probs = torch.cumsum(
|
cum_probs = torch.cumsum(
|
||||||
@ -139,8 +146,10 @@ def logits_to_probs(
|
|||||||
)
|
)
|
||||||
logits = logits.masked_fill(indices_to_remove, -float("Inf"))
|
logits = logits.masked_fill(indices_to_remove, -float("Inf"))
|
||||||
|
|
||||||
|
# Use temperature to smooth the distribution
|
||||||
logits = logits / max(temperature, 1e-5)
|
logits = logits / max(temperature, 1e-5)
|
||||||
|
|
||||||
|
# Top-k sampling, we sample only from the top k most likely tokens
|
||||||
if top_k is not None:
|
if top_k is not None:
|
||||||
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
||||||
pivot = v.select(-1, -1).unsqueeze(-1)
|
pivot = v.select(-1, -1).unsqueeze(-1)
|
||||||
|
@ -311,7 +311,7 @@ def merge_short_text_in_array(texts, threshold):
|
|||||||
result[len(result) - 1] += text
|
result[len(result) - 1] += text
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free = False):
|
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, min_p=0.8, top_p=1.0, temperature=0.6, ref_free = False):
|
||||||
if prompt_text is None or len(prompt_text) == 0:
|
if prompt_text is None or len(prompt_text) == 0:
|
||||||
ref_free = True
|
ref_free = True
|
||||||
t0 = ttime()
|
t0 = ttime()
|
||||||
@ -399,6 +399,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
|
|||||||
bert,
|
bert,
|
||||||
# prompt_phone_len=ph_offset,
|
# prompt_phone_len=ph_offset,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
min_p=min_p,
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
early_stop_num=hz * max_sec,
|
early_stop_num=hz * max_sec,
|
||||||
@ -586,6 +587,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
gr.Markdown(value=i18n("gpt采样参数(无参考文本时不要太低):"))
|
gr.Markdown(value=i18n("gpt采样参数(无参考文本时不要太低):"))
|
||||||
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
|
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
|
||||||
|
min_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("min_p"),value=0.8,interactive=True)
|
||||||
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
|
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
|
||||||
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
||||||
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
||||||
@ -593,7 +595,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
|
|
||||||
inference_button.click(
|
inference_button.click(
|
||||||
get_tts_wav,
|
get_tts_wav,
|
||||||
[inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free],
|
[inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, min_p, top_p, temperature, ref_text_free],
|
||||||
[output],
|
[output],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user