mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-05-01 23:32:33 +08:00
添加音频倍速支持: GPT_SoVITS/TTS_infer_pack/TTS.py
添加音频倍速支持: GPT_SoVITS/inference_webui.py
This commit is contained in:
parent
7556e8cc96
commit
61453b59b2
@ -1,5 +1,6 @@
|
|||||||
import os, sys
|
import os, sys
|
||||||
|
|
||||||
|
import ffmpeg
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
import os
|
import os
|
||||||
@ -405,7 +406,8 @@ class TTS:
|
|||||||
"temperature": 0.6,
|
"temperature": 0.6,
|
||||||
"text_split_method": "",
|
"text_split_method": "",
|
||||||
"batch_size": 1,
|
"batch_size": 1,
|
||||||
"batch_threshold": 0.75
|
"batch_threshold": 0.75,
|
||||||
|
"speed_factor":1.0,
|
||||||
}
|
}
|
||||||
returns:
|
returns:
|
||||||
tulpe[int, np.ndarray]: sampling rate and audio data.
|
tulpe[int, np.ndarray]: sampling rate and audio data.
|
||||||
@ -421,6 +423,7 @@ class TTS:
|
|||||||
text_split_method:str = inputs.get("text_split_method", "")
|
text_split_method:str = inputs.get("text_split_method", "")
|
||||||
batch_size = inputs.get("batch_size", 1)
|
batch_size = inputs.get("batch_size", 1)
|
||||||
batch_threshold = inputs.get("batch_threshold", 0.75)
|
batch_threshold = inputs.get("batch_threshold", 0.75)
|
||||||
|
speed_factor = inputs.get("speed_factor", 1.0)
|
||||||
|
|
||||||
no_prompt_text = False
|
no_prompt_text = False
|
||||||
if prompt_text in [None, ""]:
|
if prompt_text in [None, ""]:
|
||||||
@ -548,7 +551,34 @@ class TTS:
|
|||||||
|
|
||||||
audio = self.recovery_order(audio, batch_index_list)
|
audio = self.recovery_order(audio, batch_index_list)
|
||||||
print("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t_34, t_45))
|
print("%.3f\t%.3f\t%.3f\t%.3f" % (t1 - t0, t2 - t1, t_34, t_45))
|
||||||
yield self.configs.sampling_rate, (np.concatenate(audio, 0) * 32768).astype(
|
|
||||||
np.int16
|
|
||||||
)
|
|
||||||
|
|
||||||
|
audio = np.concatenate(audio, 0)
|
||||||
|
audio = (audio * 32768).astype(np.int16)
|
||||||
|
if speed_factor != 1.0:
|
||||||
|
audio = speed_change(audio, speed=speed_factor, sr=int(self.configs.sampling_rate))
|
||||||
|
|
||||||
|
yield self.configs.sampling_rate, audio
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def speed_change(input_audio:np.ndarray, speed:float, sr:int):
|
||||||
|
# 将 NumPy 数组转换为原始 PCM 流
|
||||||
|
raw_audio = input_audio.astype(np.int16).tobytes()
|
||||||
|
|
||||||
|
# 设置 ffmpeg 输入流
|
||||||
|
input_stream = ffmpeg.input('pipe:', format='s16le', acodec='pcm_s16le', ar=str(sr), ac=1)
|
||||||
|
|
||||||
|
# 变速处理
|
||||||
|
output_stream = input_stream.filter('atempo', speed)
|
||||||
|
|
||||||
|
# 输出流到管道
|
||||||
|
out, _ = (
|
||||||
|
output_stream.output('pipe:', format='s16le', acodec='pcm_s16le')
|
||||||
|
.run(input=raw_audio, capture_stdout=True, capture_stderr=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 将管道输出解码为 NumPy 数组
|
||||||
|
processed_audio = np.frombuffer(out, np.int16)
|
||||||
|
|
||||||
|
return processed_audio
|
@ -68,7 +68,7 @@ tts_pipline = TTS(tts_config)
|
|||||||
gpt_path = tts_config.t2s_weights_path
|
gpt_path = tts_config.t2s_weights_path
|
||||||
sovits_path = tts_config.vits_weights_path
|
sovits_path = tts_config.vits_weights_path
|
||||||
|
|
||||||
def inference(text, text_lang, ref_audio_path, prompt_text, prompt_lang, top_k, top_p, temperature, text_split_method, batch_size):
|
def inference(text, text_lang, ref_audio_path, prompt_text, prompt_lang, top_k, top_p, temperature, text_split_method, batch_size, speed_factor):
|
||||||
inputs={
|
inputs={
|
||||||
"text": text,
|
"text": text,
|
||||||
"text_lang": dict_language[text_lang],
|
"text_lang": dict_language[text_lang],
|
||||||
@ -80,6 +80,7 @@ def inference(text, text_lang, ref_audio_path, prompt_text, prompt_lang, top_k,
|
|||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"text_split_method": cut_method[text_split_method],
|
"text_split_method": cut_method[text_split_method],
|
||||||
"batch_size":int(batch_size),
|
"batch_size":int(batch_size),
|
||||||
|
"speed_factor":float(speed_factor)
|
||||||
}
|
}
|
||||||
yield next(tts_pipline.run(inputs))
|
yield next(tts_pipline.run(inputs))
|
||||||
|
|
||||||
@ -154,6 +155,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
gr.Markdown(value=i18n("gpt采样参数(无参考文本时不要太低):"))
|
gr.Markdown(value=i18n("gpt采样参数(无参考文本时不要太低):"))
|
||||||
batch_size = gr.Slider(minimum=1,maximum=20,step=1,label=i18n("batch_size"),value=1,interactive=True)
|
batch_size = gr.Slider(minimum=1,maximum=20,step=1,label=i18n("batch_size"),value=1,interactive=True)
|
||||||
|
speed_factor = gr.Slider(minimum=0.25,maximum=4,step=0.05,label="speed_factor",value=1.0,interactive=True)
|
||||||
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
|
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
|
||||||
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
|
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
|
||||||
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
||||||
@ -165,7 +167,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
|||||||
|
|
||||||
inference_button.click(
|
inference_button.click(
|
||||||
inference,
|
inference,
|
||||||
[text,text_language, inp_ref, prompt_text, prompt_language, top_k, top_p, temperature, how_to_cut, batch_size],
|
[text,text_language, inp_ref, prompt_text, prompt_language, top_k, top_p, temperature, how_to_cut, batch_size, speed_factor],
|
||||||
[output],
|
[output],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user