Update english.py

修改代码风格和zh处理保持一致
This commit is contained in:
Cosmo Clara 2024-10-31 18:30:04 +08:00 committed by GitHub
parent 6520951c74
commit 331d176798
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -22,6 +22,14 @@ CMU_DICT_HOT_PATH = os.path.join(current_file_path, "engdict-hot.rep")
CACHE_PATH = os.path.join(current_file_path, "engdict_cache.pickle")
NAMECACHE_PATH = os.path.join(current_file_path, "namedict_cache.pickle")
rep_map = {
"[;:]": ",",
'["]': "'",
"": ".",
"": "!",
"": "?",
}
arpa = {
"AH0",
"S",
@ -221,15 +229,9 @@ def get_namedict():
def text_normalize(text):
# todo: eng text normalize
# 适配中文及 g2p_en 标点
rep_map = {
"[;:]": ",",
'["]': "'",
"": ".",
"": "!",
"": "?",
}
for p, r in rep_map.items():
text = re.sub(p, r, text)
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
text = pattern.sub(lambda x: rep_map[x.group()], text)
# 来自 g2p_en 文本格式化处理
# 增加大写兼容