api_v2 支持多音字

This commit is contained in:
fangzhehao 2024-12-09 11:36:33 +08:00
parent 5202a7793d
commit c32813913b
2 changed files with 5 additions and 1 deletions

View File

@ -117,6 +117,7 @@ class TextPreprocessor:
return self.get_phones_and_bert(text, language, version)
def get_phones_and_bert(self, text:str, language:str, version:str, final:bool=False):
LangSegment.setKeepPinyin(True)
if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
language = language.replace("all_","")
if language == "en":

View File

@ -177,6 +177,9 @@ def _g2p(segments):
# assert len(sub_initials) == len(sub_finals) == len(word)
initials = sum(initials, [])
finals = sum(finals, [])
initials=[to_initials(customer_pinyin[index]) if customer_pinyin[index] != "" else item for index,item in enumerate(initials)]
finals=[to_finals_tone3(customer_pinyin[index]) if customer_pinyin[index] != "" else item for index,item in enumerate(finals)]
print("customer_pinyin:",customer_pinyin)
print("pypinyin结果",initials,finals)
else:
# g2pw采用整句推理
@ -215,7 +218,7 @@ def _g2p(segments):
initials = sum(initials, [])
finals = sum(finals, [])
# print("g2pw结果",initials,finals)
print("g2pw结果",initials,finals)
for c, v in zip(initials, finals):
raw_pinyin = c + v