From 160dfbdd2c88622c88c51b1b16afca59710492e5 Mon Sep 17 00:00:00 2001 From: jmaple12 <93472187+jmaple12@users.noreply.github.com> Date: Thu, 14 Mar 2024 18:14:42 +0800 Subject: [PATCH] Update text_segmentation_method.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 第85行,修改cut1函数,如果文本的句数为11句,则原函数会把句子切分为 4/7而不是4/4/3。模型处理长句子容易出现漏字现象。 第137行函数cut5中的punds的取值做出修改:删除重复的";",新增":;" --- GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py index 2a182b2..eb25610 100644 --- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py +++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py @@ -82,7 +82,8 @@ def cut1(inp): inp = inp.strip("\n") inps = split(inp) split_idx = list(range(0, len(inps), 4)) - split_idx[-1] = None + # split_idx[-1] = None + split_idx.append(None) if len(split_idx) > 1: opts = [] for idx in range(len(split_idx) - 1): @@ -135,7 +136,8 @@ def cut5(inp): # if not re.search(r'[^\w\s]', inp[-1]): # inp += '。' inp = inp.strip("\n") - punds = r'[,.;?!、,。?!;:…]' + # punds = r'[,.;?!、,。?!;:…]' + punds = r'[,.;?!、,。?!;::…]' items = re.split(f'({punds})', inp) mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])] # 在句子不存在符号或句尾无符号的时候保证文本完整 @@ -149,4 +151,4 @@ def cut5(inp): if __name__ == '__main__': method = get_method("cut5") print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。")) - \ No newline at end of file +