Update english.py

修改代码风格和zh处理保持一致
This commit is contained in:
Cosmo Clara 2024-10-31 18:30:04 +08:00 committed by GitHub
parent 6520951c74
commit 331d176798
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -22,6 +22,14 @@ CMU_DICT_HOT_PATH = os.path.join(current_file_path, "engdict-hot.rep")
CACHE_PATH = os.path.join(current_file_path, "engdict_cache.pickle") CACHE_PATH = os.path.join(current_file_path, "engdict_cache.pickle")
NAMECACHE_PATH = os.path.join(current_file_path, "namedict_cache.pickle") NAMECACHE_PATH = os.path.join(current_file_path, "namedict_cache.pickle")
rep_map = {
"[;:]": ",",
'["]': "'",
"": ".",
"": "!",
"": "?",
}
arpa = { arpa = {
"AH0", "AH0",
"S", "S",
@ -221,15 +229,9 @@ def get_namedict():
def text_normalize(text): def text_normalize(text):
# todo: eng text normalize # todo: eng text normalize
# 适配中文及 g2p_en 标点 # 适配中文及 g2p_en 标点
rep_map = {
"[;:]": ",", pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
'["]': "'", text = pattern.sub(lambda x: rep_map[x.group()], text)
"": ".",
"": "!",
"": "?",
}
for p, r in rep_map.items():
text = re.sub(p, r, text)
# 来自 g2p_en 文本格式化处理 # 来自 g2p_en 文本格式化处理
# 增加大写兼容 # 增加大写兼容