Fix RegEX

This commit is contained in:
Ziyao Wang 2024-08-01 19:35:30 +08:00 committed by GitHub
parent ff8a0ea4a0
commit cb6fbdad74
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -237,7 +237,7 @@ def text_normalize(text):
text = normalize_numbers(text) text = normalize_numbers(text)
text = ''.join(char for char in unicodedata.normalize('NFD', text) text = ''.join(char for char in unicodedata.normalize('NFD', text)
if unicodedata.category(char) != 'Mn') # Strip accents if unicodedata.category(char) != 'Mn') # Strip accents
text = re.sub("[^ A-Za-z'.,?!\-]", "", text) text = re.sub(r"[^ A-Za-z'.,?!\-]", "", text)
text = re.sub(r"(?i)i\.e\.", "that is", text) text = re.sub(r"(?i)i\.e\.", "that is", text)
text = re.sub(r"(?i)e\.g\.", "for example", text) text = re.sub(r"(?i)e\.g\.", "for example", text)