中文多音字逻辑更新

中文多音字逻辑更新
This commit is contained in:
RVC-Boss 2024-08-06 14:06:36 +08:00 committed by GitHub
parent 9f8f365b93
commit 7778412ed2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 45040 additions and 7 deletions

View File

@ -1 +1 @@
from text.g2pw.g2pw import *
from text.g2pw.g2pw import *

View File

@ -13,6 +13,7 @@ from .onnx_api import G2PWOnnxConverter
current_file_path = os.path.dirname(__file__)
CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle")
PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep")
PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")
class G2PWPinyin(Pinyin):
@ -69,12 +70,6 @@ class Converter(UltimateConverter):
def _to_pinyin(self, han, style, heteronym, errors, strict, **kwargs):
pinyins = []
if han in pp_dict:
phns = pp_dict[han]
for ph in phns:
pinyins.append([ph])
return pinyins
g2pw_pinyin = self._g2pw(han)
if not g2pw_pinyin: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
@ -139,7 +134,21 @@ def read_dict():
value = eval(value_str.strip())
polyphonic_dict[key.strip()] = value
line = f.readline()
with open(PP_FIX_DICT_PATH) as f:
line = f.readline()
while line:
key, value_str = line.split(':')
value = eval(value_str.strip())
polyphonic_dict[key.strip()] = value
line = f.readline()
return polyphonic_dict
def correct_pronunciation(word,word_pinyins):
if word in pp_dict:
word_pinyins = pp_dict[word]
return word_pinyins
pp_dict = get_dict()

File diff suppressed because it is too large Load Diff