fix: 解决按标点符号切分时小数会被切分(#1084) (#1253)

* fix: 解决按标点符号切分时小数会被切分

* fix: update cut5
This commit is contained in:
aoguai 2024-07-06 15:08:54 +08:00 committed by GitHub
parent 95354647c8
commit 6638e66294
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -510,16 +510,26 @@ def cut4(inp):
# contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py # contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py
def cut5(inp): def cut5(inp):
# if not re.search(r'[^\w\s]', inp[-1]):
# inp += '。'
inp = inp.strip("\n") inp = inp.strip("\n")
punds = r'[,.;?!、,。?!;:…]' punds = {',', '.', ';', '?', '!', '', '', '', '', '', ';', '', ''}
items = re.split(f'({punds})', inp) mergeitems = []
mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])] items = []
# 在句子不存在符号或句尾无符号的时候保证文本完整
if len(items)%2 == 1: for i, char in enumerate(inp):
mergeitems.append(items[-1]) if char in punds:
opt = [item for item in mergeitems if not set(item).issubset(punctuation)] if char == '.' and i > 0 and i < len(inp) - 1 and inp[i - 1].isdigit() and inp[i + 1].isdigit():
items.append(char)
else:
items.append(char)
mergeitems.append("".join(items))
items = []
else:
items.append(char)
if items:
mergeitems.append("".join(items))
opt = [item for item in mergeitems if not set(item).issubset(punds)]
return "\n".join(opt) return "\n".join(opt)