Merge d09022ee6fe5b07b6a2516012666d24485b21d57 into 5dfce9a3f0def7f1ee1e075df569b0b2d41df9e3

This commit is contained in:
jmaple12 2024-08-21 13:41:43 -07:00 committed by GitHub
commit 9a3fa8f8af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -74,23 +74,28 @@ class TextPreprocessor:
def pre_seg_text(self, text:str, lang:str, text_split_method:str):
text = text.strip("\n")
if (text[0] not in splits and len(get_first(text)) < 4):
text = "" + text if lang != "en" else "." + text
#防止text成为空字符串
#如果字符里面没有文字,视其为空字符串
if not re.sub('\W','', text):
text =''
if text:
if (text[0] not in splits and len(get_first(text)) < 4):
text = "" + text if lang != "en" else "." + text
print(i18n("实际输入的目标文本:"))
print(text)
seg_method = get_seg_method(text_split_method)
text = seg_method(text)
while "\n\n" in text:
text = text.replace("\n\n", "\n")
# while "\n\n" in text:
# text = text.replace("\n\n", "\n")
text = re.sub(r'\n+','\n', text)
_texts = text.split("\n")
_texts = self.process_text(_texts)
_texts = merge_short_text_in_array(_texts, 5)
texts = []
for text in _texts:
# 解决输入目标文本的空行导致报错的问题
if (len(text.strip()) == 0):
@ -105,7 +110,7 @@ class TextPreprocessor:
texts.extend(split_big_text(text))
else:
texts.append(text)
print(i18n("实际输入的目标文本(切句后):"))
print(texts)
return texts