From d2b5db9f2c00f3328cb2a6bff48b9c79f9c6f98e Mon Sep 17 00:00:00 2001 From: csh <458761603@qq.com> Date: Thu, 29 May 2025 02:42:22 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E5=90=8D=20export=5Ftorch=5Fscript=5F?= =?UTF-8?q?v3.py=20=E4=B8=BA=20export=5Ftorch=5Fscript=5Fv3v4.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...{export_torch_script_v3.py => export_torch_script_v3v4.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename GPT_SoVITS/{export_torch_script_v3.py => export_torch_script_v3v4.py} (99%) diff --git a/GPT_SoVITS/export_torch_script_v3.py b/GPT_SoVITS/export_torch_script_v3v4.py similarity index 99% rename from GPT_SoVITS/export_torch_script_v3.py rename to GPT_SoVITS/export_torch_script_v3v4.py index dd8464be..0a9c2a19 100644 --- a/GPT_SoVITS/export_torch_script_v3.py +++ b/GPT_SoVITS/export_torch_script_v3v4.py @@ -473,7 +473,7 @@ class GPTSoVITSV4(torch.nn.Module): # 因为导出的模型在不同shape时会重新编译还是怎么的,会卡顿10s这样, # 所以在这里补0让他shape维持不变 # 但是这样会导致生成的音频长度不对,所以在最后截取一下。 - # 经过 bigvgan 之后音频长度就是 fea_todo.shape[2] * 256 + # 经过 hifigan 之后音频长度就是 fea_todo.shape[2] * 480 complete_len = chunk_len - fea_todo_chunk.shape[-1] if complete_len != 0: fea_todo_chunk = torch.cat( @@ -1090,7 +1090,7 @@ def test_export1( bert1 = bert1.T.to(device) bert2 = bert2.T.to(device) - top_k = torch.LongTensor([15]).to(device) + top_k = torch.LongTensor([20]).to(device) current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logger.info("start inference %s", current_time)