From 14191901cdb7e791d8fee1ff31dffe107f9e28fb Mon Sep 17 00:00:00 2001 From: huang yutong <3565724239@qq.com> Date: Sat, 18 Apr 2026 17:10:56 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=A4=9A=E4=B8=AA?= =?UTF-8?q?=E6=A8=A1=E5=9D=97=E4=B8=AD=E7=9A=84=E7=8B=AC=E7=AB=8B=20bug=20?= =?UTF-8?q?(#2755)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 修复 sync_buffer 中除以函数对象而非调用结果(distrib.py) - `buffer.data /= world_size` 中 world_size 是函数,缺少 (), 导致 TypeError 使分布式训练 buffer 同步失败 2. 修复 istft 函数缺少 return 语句(spec_utils.py) - 函数计算了结果但未返回,调用者始终得到 None 3. 修复 cut0 返回字面量 "/n" 而非换行符 "\n"(text_segmentation_method.py) - 导致后续 text.split("\n") 无法正确切分,字面 /n 被当作文本内容 4. 修复粤语 ASR 的 vad/punc model_revision 被无条件覆盖(funasr_asr.py) - 粤语分支将 vad_model_revision 设为空(因不使用 VAD/标点模型), 但 if/else 外的赋值将其覆盖为 "v2.0.4",传入错误的 revision 参数 Made-with: Cursor --- GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py | 2 +- GPT_SoVITS/module/distrib.py | 2 +- tools/asr/funasr_asr.py | 3 +-- tools/uvr5/lib/lib_v5/spec_utils.py | 2 ++ 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py index fda70a49..cf28f3fb 100644 --- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py +++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py @@ -92,7 +92,7 @@ def cut0(inp): if not set(inp).issubset(punctuation): return inp else: - return "/n" + return "\n" # 凑四句一切 diff --git a/GPT_SoVITS/module/distrib.py b/GPT_SoVITS/module/distrib.py index cabf8f8a..e96f8e00 100644 --- a/GPT_SoVITS/module/distrib.py +++ b/GPT_SoVITS/module/distrib.py @@ -87,7 +87,7 @@ def sync_buffer(buffers, average=True): for buffer, handle in handles: handle.wait() if average: - buffer.data /= world_size + buffer.data /= world_size() def sync_grad(params): diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py index 6a5c9989..b4ddce9e 100644 --- a/tools/asr/funasr_asr.py +++ b/tools/asr/funasr_asr.py @@ -39,6 +39,7 @@ def create_model(language="zh"): local_dir="tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", ) model_revision = "v2.0.4" + vad_model_revision = punc_model_revision = "v2.0.4" elif language == "yue": path_asr = "tools/asr/models/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online" snapshot_download( @@ -51,8 +52,6 @@ def create_model(language="zh"): else: raise ValueError(f"{language} is not supported") - vad_model_revision = punc_model_revision = "v2.0.4" - if language in funasr_models: return funasr_models[language] else: diff --git a/tools/uvr5/lib/lib_v5/spec_utils.py b/tools/uvr5/lib/lib_v5/spec_utils.py index 4d987cd8..d2d2bf34 100644 --- a/tools/uvr5/lib/lib_v5/spec_utils.py +++ b/tools/uvr5/lib/lib_v5/spec_utils.py @@ -485,6 +485,8 @@ def istft(spec, hl): wave_right = librosa.istft(spec_right, hop_length=hl) wave = np.asfortranarray([wave_left, wave_right]) + return wave + if __name__ == "__main__": import argparse