stream_v2pro: 修复由于 spectrogram_torch 输入是 half 导致 spec 溢出最终没有声音的问题

This commit is contained in:
csh 2025-08-18 16:25:38 +08:00
parent 6d82af146b
commit 60f07ea36e

View File

@ -162,13 +162,14 @@ class StepVitsModel(nn.Module):
def ref_handle(self, ref_audio_32k): def ref_handle(self, ref_audio_32k):
refer = spectrogram_torch( refer = spectrogram_torch(
self.hann_window, self.hann_window,
ref_audio_32k, ref_audio_32k.float(),
self.hps.data.filter_length, self.hps.data.filter_length,
self.hps.data.sampling_rate, self.hps.data.sampling_rate,
self.hps.data.hop_length, self.hps.data.hop_length,
self.hps.data.win_length, self.hps.data.win_length,
center=False, center=False,
) )
refer = refer.to(ref_audio_32k.dtype)
ref_audio_16k = resamplex(ref_audio_32k, 32000, 16000).to(ref_audio_32k.dtype).to(ref_audio_32k.device) ref_audio_16k = resamplex(ref_audio_32k, 32000, 16000).to(ref_audio_32k.dtype).to(ref_audio_32k.device)
sv_emb = self.sv(ref_audio_16k) sv_emb = self.sv(ref_audio_16k)
return refer, sv_emb return refer, sv_emb