fix 24k to 48k inference

fix 24k to 48k inference
This commit is contained in:
RVC-Boss 2025-02-27 19:05:54 +08:00 committed by GitHub
parent af80e8f113
commit 060a0d91dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -519,7 +519,10 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
t = [] t = []
if prompt_text is None or len(prompt_text) == 0: if prompt_text is None or len(prompt_text) == 0:
ref_free = True ref_free = True
if model_version=="v3":ref_free=False#s2v3暂不支持ref_free if model_version=="v3":
ref_free=False#s2v3暂不支持ref_free
else:
if_sr=False
t0 = ttime() t0 = ttime()
prompt_language = dict_language[prompt_language] prompt_language = dict_language[prompt_language]
text_language = dict_language[text_language] text_language = dict_language[text_language]
@ -636,7 +639,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
except: except:
traceback.print_exc() traceback.print_exc()
if(len(refers)==0):refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)] if(len(refers)==0):refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)]
audio = (vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers,speed=speed).detach().cpu().numpy()[0, 0]) audio = vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers,speed=speed)[0][0]#.cpu().detach().numpy()
else: else:
refer = get_spepc(hps, ref_wav_path).to(device).to(dtype)#######这里要重采样切到32k,因为src是24k的没有单独的32k的src所以不能改成2个路径 refer = get_spepc(hps, ref_wav_path).to(device).to(dtype)#######这里要重采样切到32k,因为src是24k的没有单独的32k的src所以不能改成2个路径
phoneme_ids0=torch.LongTensor(phones1).to(device).unsqueeze(0) phoneme_ids0=torch.LongTensor(phones1).to(device).unsqueeze(0)