Update TextPreprocessor.py

删除punctuation中的“ ”,以便于按英文句号切分
This commit is contained in:
Cosmo Clara 2024-10-18 01:05:07 +08:00 committed by GitHub
parent 395f97a176
commit 5aeb544c90
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -20,7 +20,7 @@ from tools.i18n.i18n import I18nAuto, scan_language_list
language=os.environ.get("language","Auto")
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
i18n = I18nAuto(language=language)
punctuation = set(['!', '?', '', ',', '.', '-'," "])
punctuation = set(['!', '?', '', ',', '.', '-'])
def get_first(text:str) -> str:
pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]"