调整minus识别,防止误识别转化

This commit is contained in:
Cosmo Clara 2024-12-11 02:50:20 +08:00 committed by GitHub
parent f6d0165843
commit 401c275c5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -237,6 +237,7 @@ def normalize(text):
"""
text = re.sub(_ordinal_number_re, _convert_ordinal, text)
text = re.sub(r'(?<!\d)-|-(?!\d)', ' minus ', text)
text = re.sub(_comma_number_re, _remove_commas, text)
text = re.sub(_time_re, _expand_time, text)
text = re.sub(_measurement_re, _expand_measurement, text)
@ -252,7 +253,6 @@ def normalize(text):
text = ''.join(char for char in unicodedata.normalize('NFD', text)
if unicodedata.category(char) != 'Mn') # Strip accents
text = re.sub("-", "minus ", text)
text = re.sub("%", " percent", text)
text = re.sub("[^ A-Za-z'.,?!\-]", "", text)
text = re.sub(r"(?i)i\.e\.", "that is", text)