From 6638e6629400cf76706c577a48f5a9ef4a73edf6 Mon Sep 17 00:00:00 2001 From: aoguai <34203474+aoguai@users.noreply.github.com> Date: Sat, 6 Jul 2024 15:08:54 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E8=A7=A3=E5=86=B3=E6=8C=89=E6=A0=87?= =?UTF-8?q?=E7=82=B9=E7=AC=A6=E5=8F=B7=E5=88=87=E5=88=86=E6=97=B6=E5=B0=8F?= =?UTF-8?q?=E6=95=B0=E4=BC=9A=E8=A2=AB=E5=88=87=E5=88=86(#1084)=20(#1253)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: 解决按标点符号切分时小数会被切分 * fix: update cut5 --- GPT_SoVITS/inference_webui.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 44c6d0e..05ef486 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -510,16 +510,26 @@ def cut4(inp): # contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py def cut5(inp): - # if not re.search(r'[^\w\s]', inp[-1]): - # inp += '。' inp = inp.strip("\n") - punds = r'[,.;?!、,。?!;:…]' - items = re.split(f'({punds})', inp) - mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])] - # 在句子不存在符号或句尾无符号的时候保证文本完整 - if len(items)%2 == 1: - mergeitems.append(items[-1]) - opt = [item for item in mergeitems if not set(item).issubset(punctuation)] + punds = {',', '.', ';', '?', '!', '、', ',', '。', '?', '!', ';', ':', '…'} + mergeitems = [] + items = [] + + for i, char in enumerate(inp): + if char in punds: + if char == '.' and i > 0 and i < len(inp) - 1 and inp[i - 1].isdigit() and inp[i + 1].isdigit(): + items.append(char) + else: + items.append(char) + mergeitems.append("".join(items)) + items = [] + else: + items.append(char) + + if items: + mergeitems.append("".join(items)) + + opt = [item for item in mergeitems if not set(item).issubset(punds)] return "\n".join(opt)