调整一下判断纯符号所处的函数,更符号逻辑

This commit is contained in:
CyberWon 2024-06-11 17:44:12 +08:00 committed by GitHub
parent 75c7f1d971
commit c32d23fe5d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -61,9 +61,6 @@ class TextPreprocessor:
result = [] result = []
print(i18n("############ 提取文本Bert特征 ############")) print(i18n("############ 提取文本Bert特征 ############"))
for text in tqdm(texts): for text in tqdm(texts):
if not re.sub("\W+", "", text):
# 检测一下,如果是纯符号,就跳过。
continue
phones, bert_features, norm_text = self.segment_and_extract_feature_for_text(text, lang) phones, bert_features, norm_text = self.segment_and_extract_feature_for_text(text, lang)
if phones is None: if phones is None:
continue continue
@ -97,6 +94,9 @@ class TextPreprocessor:
for text in _texts: for text in _texts:
# 解决输入目标文本的空行导致报错的问题 # 解决输入目标文本的空行导致报错的问题
if (len(text.strip()) == 0): if (len(text.strip()) == 0):
continue
if not re.sub("\W+", "", text):
# 检测一下,如果是纯符号,就跳过。
continue continue
if (text[-1] not in splits): text += "" if lang != "en" else "." if (text[-1] not in splits): text += "" if lang != "en" else "."