mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-08 16:00:01 +08:00
Add more polyphonic and merge_erhua
This commit is contained in:
parent
c31126f674
commit
401bf3a04b
@ -86,6 +86,54 @@ def _get_initials_finals(word):
|
||||
return initials, finals
|
||||
|
||||
|
||||
must_erhua = {
|
||||
"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
|
||||
}
|
||||
not_erhua = {
|
||||
"虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
|
||||
"拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
|
||||
"流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
|
||||
"孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
|
||||
"狗儿", "少儿"
|
||||
}
|
||||
def _merge_erhua(initials: list[str],
|
||||
finals: list[str],
|
||||
word: str,
|
||||
pos: str) -> list[list[str]]:
|
||||
"""
|
||||
Do erhub.
|
||||
"""
|
||||
# fix er1
|
||||
for i, phn in enumerate(finals):
|
||||
if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
|
||||
finals[i] = 'er2'
|
||||
|
||||
# 发音
|
||||
if word not in must_erhua and (word in not_erhua or
|
||||
pos in {"a", "j", "nr"}):
|
||||
return initials, finals
|
||||
|
||||
# "……" 等情况直接返回
|
||||
if len(finals) != len(word):
|
||||
return initials, finals
|
||||
|
||||
assert len(finals) == len(word)
|
||||
|
||||
# 与前一个字发同音
|
||||
new_initials = []
|
||||
new_finals = []
|
||||
for i, phn in enumerate(finals):
|
||||
if i == len(finals) - 1 and word[i] == "儿" and phn in {
|
||||
"er2", "er5"
|
||||
} and word[-2:] not in not_erhua and new_finals:
|
||||
phn = "er" + new_finals[-1][-1]
|
||||
|
||||
new_initials.append(initials[i])
|
||||
new_finals.append(phn)
|
||||
|
||||
return new_initials, new_finals
|
||||
|
||||
|
||||
def _g2p(segments):
|
||||
phones_list = []
|
||||
word2ph = []
|
||||
@ -104,6 +152,8 @@ def _g2p(segments):
|
||||
continue
|
||||
sub_initials, sub_finals = _get_initials_finals(word)
|
||||
sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
|
||||
# 儿化
|
||||
sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
|
||||
initials.append(sub_initials)
|
||||
finals.append(sub_finals)
|
||||
# assert len(sub_initials) == len(sub_finals) == len(word)
|
||||
@ -139,6 +189,8 @@ def _g2p(segments):
|
||||
|
||||
pre_word_length = now_word_length
|
||||
sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
|
||||
# 儿化
|
||||
sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
|
||||
initials.append(sub_initials)
|
||||
finals.append(sub_finals)
|
||||
|
||||
|
@ -13,6 +13,7 @@ from .onnx_api import G2PWOnnxConverter
|
||||
current_file_path = os.path.dirname(__file__)
|
||||
CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle")
|
||||
PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep")
|
||||
PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")
|
||||
|
||||
|
||||
class G2PWPinyin(Pinyin):
|
||||
@ -133,6 +134,13 @@ def read_dict():
|
||||
value = eval(value_str.strip())
|
||||
polyphonic_dict[key.strip()] = value
|
||||
line = f.readline()
|
||||
with open(PP_FIX_DICT_PATH) as f:
|
||||
line = f.readline()
|
||||
while line:
|
||||
key, value_str = line.split(':')
|
||||
value = eval(value_str.strip())
|
||||
polyphonic_dict[key.strip()] = value
|
||||
line = f.readline()
|
||||
return polyphonic_dict
|
||||
|
||||
|
||||
|
45024
GPT_SoVITS/text/g2pw/polyphonic-fix.rep
Normal file
45024
GPT_SoVITS/text/g2pw/polyphonic-fix.rep
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -51,25 +51,3 @@
|
||||
郭晟: ['guo1', 'sheng4']
|
||||
中标: ['zhong4', 'biao1']
|
||||
抗住: ['kang2', 'zhu4']
|
||||
果脯: ['guo3', 'fu3']
|
||||
肉脯: ['rou4', 'fu3']
|
||||
不粘锅: ['bu4', 'zhan1', 'guo1']
|
||||
粘信封: ['zhan1', 'xin4', 'feng1']
|
||||
粘牙: ['zhan1', 'ya2']
|
||||
粘皮带骨: ['zhan1', 'pi2' 'dai4' 'gu3']
|
||||
粘贴: ['zhan1', 'tie1']
|
||||
粘连: ['zhan1', 'lian2']
|
||||
一扎: ['yī', 'zā']
|
||||
包扎: ['bāo', 'zā']
|
||||
安营扎寨: ['ān', 'yíng', 'zhā', 'zhài']
|
||||
屯扎: ['tún', 'zhā']
|
||||
巴尔扎克: ['bā', 'ěr', 'zhā', 'kè']
|
||||
扎染: ['zā', 'rǎn']
|
||||
扎根: ['zhā', 'gēn']
|
||||
扎根串连: ['zhā', 'gēn', 'chuàn', 'lián']
|
||||
扎破: ['zhá', 'pò']
|
||||
扎营: ['zhā', 'yíng']
|
||||
结扎: ['jie2', 'za1']
|
||||
绑扎: ['bang3', 'za1']
|
||||
长进: ['zhǎng', 'jìn']
|
||||
折本: ['shé', 'běn']
|
Loading…
x
Reference in New Issue
Block a user