From 3e8c071e9aecc532cd0a209c65d552e6563c4f44 Mon Sep 17 00:00:00 2001 From: KakaruHayate <97896816+KakaruHayate@users.noreply.github.com> Date: Tue, 30 Jan 2024 22:37:59 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=97=A5=E8=8B=B1/?= =?UTF-8?q?=E4=B8=AD=E8=8B=B1=E6=B7=B7=E5=90=88=E6=8E=A8=E7=90=86=E5=88=86?= =?UTF-8?q?=E8=AF=8D=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 示例 【1pass】(现在版本) ['我是一名粉刷匠,粉刷本领强,', 'I am a painter with strong painting skills', ',', 'A.B.C.D.A', '_', 'B', '—', 'C', '……', 'D.'] ['zh', 'en', 'zh', 'en', 'zh', 'en', 'zh', 'en', 'zh', 'en'] 【2pass】 ['我是一名粉刷匠,粉刷本领强,', 'I am a painter with strong painting skills,', 'A.B.C.D.A_', 'B—', 'C……', 'D.'] ['zh', 'en', 'en', 'en', 'en', 'en'] 【result】 ['我是一名粉刷匠,粉刷本领强,', 'I am a painter with strong painting skills,A.B.C.D.A_B—C……D.'] ['zh', 'en'] --- GPT_SoVITS/inference_webui.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 917bcbc..a419e9e 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -215,6 +215,21 @@ def splite_en_inf(sentence, language): if pos < len(sentence): textlist.append(sentence[pos:]) langlist.append(language) + # Merge punctuation into previous word + for i in range(len(textlist)-1, 0, -1): + if re.match(r'^[\W_]+$', textlist[i]): + textlist[i-1] += textlist[i] + del textlist[i] + del langlist[i] + # Merge consecutive words with the same language tag + i = 0 + while i < len(langlist) - 1: + if langlist[i] == langlist[i+1]: + textlist[i] += textlist[i+1] + del textlist[i+1] + del langlist[i+1] + else: + i += 1 return textlist, langlist From d6629a8c6cb9db265d8b22df74150c9dccc47242 Mon Sep 17 00:00:00 2001 From: KakaruHayate <97896816+KakaruHayate@users.noreply.github.com> Date: Tue, 30 Jan 2024 22:56:00 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E7=AC=AC=E4=B8=80=E9=81=8D=E6=AD=A3?= =?UTF-8?q?=E5=88=99=E5=8C=B9=E9=85=8D=E4=B8=8D=E6=9F=A5=E6=89=BE'.'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPT_SoVITS/inference_webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index a419e9e..d3b5991 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -200,7 +200,7 @@ dict_language = { def splite_en_inf(sentence, language): - pattern = re.compile(r'[a-zA-Z. ]+') + pattern = re.compile(r'[a-zA-Z ]+') textlist = [] langlist = [] pos = 0