diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py index 2aede2b..18ce2e6 100644 --- a/GPT_SoVITS/TTS_infer_pack/TTS.py +++ b/GPT_SoVITS/TTS_infer_pack/TTS.py @@ -576,22 +576,22 @@ class TTS: Args: inputs (dict): { - "text": "", # str. text to be synthesized - "text_lang: "", # str. language of the text to be synthesized - "ref_audio_path": "", # str. reference audio path - "prompt_text": "", # str. prompt text for the reference audio - "prompt_lang": "", # str. language of the prompt text for the reference audio - "top_k": 5, # int. top k sampling - "top_p": 1, # float. top p sampling - "temperature": 1, # float. temperature for sampling - "text_split_method": "", # str. text split method, see text_segmentaion_method.py for details. - "batch_size": 1, # int. batch size for inference - "batch_threshold": 0.75, # float. threshold for batch splitting. - "split_bucket: True, # bool. whether to split the batch into multiple buckets. - "return_fragment": False, # bool. step by step return the audio fragment. - "speed_factor":1.0, # float. control the speed of the synthesized audio. - "fragment_interval":0.3, # float. to control the interval of the audio fragment. - "seed": -1, # int. random seed for reproducibility. + "text": "", # str.(required) text to be synthesized + "text_lang: "", # str.(required) language of the text to be synthesized + "ref_audio_path": "", # str.(required) reference audio path + "prompt_text": "", # str.(optional) prompt text for the reference audio + "prompt_lang": "", # str.(required) language of the prompt text for the reference audio + "top_k": 5, # int. top k sampling + "top_p": 1, # float. top p sampling + "temperature": 1, # float. temperature for sampling + "text_split_method": "cut0", # str. text split method, see text_segmentaion_method.py for details. + "batch_size": 1, # int. batch size for inference + "batch_threshold": 0.75, # float. threshold for batch splitting. + "split_bucket: True, # bool. whether to split the batch into multiple buckets. + "return_fragment": False, # bool. step by step return the audio fragment. + "speed_factor":1.0, # float. control the speed of the synthesized audio. + "fragment_interval":0.3, # float. to control the interval of the audio fragment. + "seed": -1, # int. random seed for reproducibility. } returns: tulpe[int, np.ndarray]: sampling rate and audio data. @@ -606,7 +606,7 @@ class TTS: top_k:int = inputs.get("top_k", 5) top_p:float = inputs.get("top_p", 1) temperature:float = inputs.get("temperature", 1) - text_split_method:str = inputs.get("text_split_method", "") + text_split_method:str = inputs.get("text_split_method", "cut0") batch_size = inputs.get("batch_size", 1) batch_threshold = inputs.get("batch_threshold", 0.75) speed_factor = inputs.get("speed_factor", 1.0) @@ -824,16 +824,13 @@ class TTS: if not return_fragment: print("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t_34, t_45)) - yield [ - self.audio_postprocess(audio, + yield self.audio_postprocess(audio, self.configs.sampling_rate, batch_index_list, speed_factor, split_bucket, fragment_interval - ), - f"text: {text} text_lang: {text_lang} prompt_text: {prompt_text} prompt_lang: {prompt_lang} top_k: {top_k} top_p: {top_p} temperature: {temperature} batch_size: {batch_size} batch_threshold: {batch_threshold} split_bucket: {split_bucket} return_fragment: {return_fragment} speed_factor: {speed_factor} fragment_interval: {fragment_interval} seed: {actual_seed}" - ] + ) except Exception as e: traceback.print_exc() diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 3355e91..199948c 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -6,6 +6,7 @@ 全部按英文识别 全部按日文识别 ''' +import random import os, sys now_dir = os.getcwd() sys.path.append(now_dir) @@ -94,6 +95,7 @@ def inference(text, text_lang, split_bucket,fragment_interval, seed, ): + actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32) inputs={ "text": text, "text_lang": dict_language[text_lang], @@ -109,11 +111,10 @@ def inference(text, text_lang, "split_bucket":split_bucket, "return_fragment":False, "fragment_interval":fragment_interval, - "seed":seed, + "seed":actual_seed, } - for item in tts_pipline.run(inputs): - yield item + yield item, actual_seed def custom_sort_key(s): # 使用正则表达式提取字符串中的数字部分和非数字部分 @@ -211,8 +212,6 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: with gr.Row(): inference_button = gr.Button(i18n("合成语音"), variant="primary") stop_infer = gr.Button(i18n("终止合成"), variant="primary") - with gr.Row(): - inference_details = gr.Markdown() inference_button.click( @@ -226,7 +225,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: split_bucket,fragment_interval, seed ], - [output, inference_details], + [output, seed], ) stop_infer.click(tts_pipline.stop, [], [])