diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py
index 2aede2b..18ce2e6 100644
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@@ -576,22 +576,22 @@ class TTS:
Args:
inputs (dict):
{
- "text": "", # str. text to be synthesized
- "text_lang: "", # str. language of the text to be synthesized
- "ref_audio_path": "", # str. reference audio path
- "prompt_text": "", # str. prompt text for the reference audio
- "prompt_lang": "", # str. language of the prompt text for the reference audio
- "top_k": 5, # int. top k sampling
- "top_p": 1, # float. top p sampling
- "temperature": 1, # float. temperature for sampling
- "text_split_method": "", # str. text split method, see text_segmentaion_method.py for details.
- "batch_size": 1, # int. batch size for inference
- "batch_threshold": 0.75, # float. threshold for batch splitting.
- "split_bucket: True, # bool. whether to split the batch into multiple buckets.
- "return_fragment": False, # bool. step by step return the audio fragment.
- "speed_factor":1.0, # float. control the speed of the synthesized audio.
- "fragment_interval":0.3, # float. to control the interval of the audio fragment.
- "seed": -1, # int. random seed for reproducibility.
+ "text": "", # str.(required) text to be synthesized
+ "text_lang: "", # str.(required) language of the text to be synthesized
+ "ref_audio_path": "", # str.(required) reference audio path
+ "prompt_text": "", # str.(optional) prompt text for the reference audio
+ "prompt_lang": "", # str.(required) language of the prompt text for the reference audio
+ "top_k": 5, # int. top k sampling
+ "top_p": 1, # float. top p sampling
+ "temperature": 1, # float. temperature for sampling
+ "text_split_method": "cut0", # str. text split method, see text_segmentaion_method.py for details.
+ "batch_size": 1, # int. batch size for inference
+ "batch_threshold": 0.75, # float. threshold for batch splitting.
+ "split_bucket: True, # bool. whether to split the batch into multiple buckets.
+ "return_fragment": False, # bool. step by step return the audio fragment.
+ "speed_factor":1.0, # float. control the speed of the synthesized audio.
+ "fragment_interval":0.3, # float. to control the interval of the audio fragment.
+ "seed": -1, # int. random seed for reproducibility.
}
returns:
tulpe[int, np.ndarray]: sampling rate and audio data.
@@ -606,7 +606,7 @@ class TTS:
top_k:int = inputs.get("top_k", 5)
top_p:float = inputs.get("top_p", 1)
temperature:float = inputs.get("temperature", 1)
- text_split_method:str = inputs.get("text_split_method", "")
+ text_split_method:str = inputs.get("text_split_method", "cut0")
batch_size = inputs.get("batch_size", 1)
batch_threshold = inputs.get("batch_threshold", 0.75)
speed_factor = inputs.get("speed_factor", 1.0)
@@ -824,16 +824,13 @@ class TTS:
if not return_fragment:
print("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t_34, t_45))
- yield [
- self.audio_postprocess(audio,
+ yield self.audio_postprocess(audio,
self.configs.sampling_rate,
batch_index_list,
speed_factor,
split_bucket,
fragment_interval
- ),
- f"text: {text} text_lang: {text_lang} prompt_text: {prompt_text} prompt_lang: {prompt_lang} top_k: {top_k} top_p: {top_p} temperature: {temperature} batch_size: {batch_size} batch_threshold: {batch_threshold} split_bucket: {split_bucket} return_fragment: {return_fragment} speed_factor: {speed_factor} fragment_interval: {fragment_interval} seed: {actual_seed}"
- ]
+ )
except Exception as e:
traceback.print_exc()
diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
index 3355e91..199948c 100644
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -6,6 +6,7 @@
全部按英文识别
全部按日文识别
'''
+import random
import os, sys
now_dir = os.getcwd()
sys.path.append(now_dir)
@@ -94,6 +95,7 @@ def inference(text, text_lang,
split_bucket,fragment_interval,
seed,
):
+ actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32)
inputs={
"text": text,
"text_lang": dict_language[text_lang],
@@ -109,11 +111,10 @@ def inference(text, text_lang,
"split_bucket":split_bucket,
"return_fragment":False,
"fragment_interval":fragment_interval,
- "seed":seed,
+ "seed":actual_seed,
}
-
for item in tts_pipline.run(inputs):
- yield item
+ yield item, actual_seed
def custom_sort_key(s):
# 使用正则表达式提取字符串中的数字部分和非数字部分
@@ -211,8 +212,6 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.Row():
inference_button = gr.Button(i18n("合成语音"), variant="primary")
stop_infer = gr.Button(i18n("终止合成"), variant="primary")
- with gr.Row():
- inference_details = gr.Markdown()
inference_button.click(
@@ -226,7 +225,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
split_bucket,fragment_interval,
seed
],
- [output, inference_details],
+ [output, seed],
)
stop_infer.click(tts_pipline.stop, [], [])