From a32a2b893436fad56cc82409121c7fa36a1815d5 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:50:35 +0800 Subject: [PATCH] =?UTF-8?q?v3sovits=E6=A8=A1=E5=9E=8B=E6=8E=A8=E7=90=86?= =?UTF-8?q?=E6=94=AF=E6=8C=81webui=E4=BC=A0=E8=AF=AD=E9=80=9F=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E8=B0=83=E6=95=B4=E5=90=88=E6=88=90=E8=AF=AD=E9=80=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v3sovits模型推理支持webui传语速参数调整合成语速 --- GPT_SoVITS/module/models.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/GPT_SoVITS/module/models.py b/GPT_SoVITS/module/models.py index 338e88d..623da80 100644 --- a/GPT_SoVITS/module/models.py +++ b/GPT_SoVITS/module/models.py @@ -1251,7 +1251,7 @@ class SynthesizerTrnV3(nn.Module): return cfm_loss @torch.no_grad() - def decode_encp(self, codes,text, refer,ge=None): + def decode_encp(self, codes,text, refer,ge=None,speed=1): # print(2333333,refer.shape) # ge=None if(ge==None): @@ -1259,13 +1259,17 @@ class SynthesizerTrnV3(nn.Module): refer_mask = torch.unsqueeze(commons.sequence_mask(refer_lengths, refer.size(2)), 1).to(refer.dtype) ge = self.ref_enc(refer[:,:704] * refer_mask, refer_mask) y_lengths = torch.LongTensor([int(codes.size(2)*2)]).to(codes.device) - y_lengths1 = torch.LongTensor([int(codes.size(2)*2.5*1.5)]).to(codes.device) + if speed==1: + sizee=int(codes.size(2)*2.5*1.5) + else: + sizee=int(codes.size(2)*2.5*1.5/speed)+1 + y_lengths1 = torch.LongTensor([sizee]).to(codes.device) text_lengths = torch.LongTensor([text.size(-1)]).to(text.device) quantized = self.quantizer.decode(codes) if self.semantic_frame_rate == '25hz': quantized = F.interpolate(quantized, scale_factor=2, mode="nearest")##BCT - x, m_p, logs_p, y_mask = self.enc_p(quantized, y_lengths, text, text_lengths, ge) + x, m_p, logs_p, y_mask = self.enc_p(quantized, y_lengths, text, text_lengths, ge,speed) fea=self.bridge(x) fea = F.interpolate(fea, scale_factor=1.875, mode="nearest")##BCT ####more wn paramter to learn mel