From c89f4511da52b163dfd2112be3dcfa7b014b21f3 Mon Sep 17 00:00:00 2001 From: XTer Date: Sun, 10 Mar 2024 21:17:27 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86auto=E5=88=87?= =?UTF-8?q?=E5=88=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../text_segmentation_method.py | 33 +++++++++++++++++++ Inference | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py index 7bc6b009..657b2fcf 100644 --- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py +++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py @@ -118,6 +118,39 @@ def cut5(inp): opt = "\n".join(mergeitems) return opt +@register_method("auto_cut") +def auto_cut(inp): + # if not re.search(r'[^\w\s]', inp[-1]): + # inp += '。' + inp = inp.strip("\n") + + split_punds = r'[?!。?!~:]' + if inp[-1] not in split_punds: + inp+="。" + items = re.split(f'({split_punds})', inp) + items = ["".join(group) for group in zip(items[::2], items[1::2])] + + def process_commas(text): + + separators = [',', ',', '、', '——', '…'] + count = 0 + processed_text = "" + for char in text: + processed_text += char + if char in separators: + if count > 12: + processed_text += '\n' + count = 0 + else: + count += 1 # 对于非分隔符字符,增加计数 + return processed_text + + final_items=[process_commas(item) for item in items] + final_items = [item for item in final_items if item.strip()] + + return "\n".join(final_items) + + if __name__ == '__main__': diff --git a/Inference b/Inference index 39118a4e..5c8314db 160000 --- a/Inference +++ b/Inference @@ -1 +1 @@ -Subproject commit 39118a4e07c66690e33be6ec8a90652b7dd2a432 +Subproject commit 5c8314db790f57c488c2d69f129c475440879ba6