mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
56 lines
1.7 KiB
Python
56 lines
1.7 KiB
Python
from text import chinese, japanese, cleaned_text_to_sequence, symbols, english
|
|
|
|
language_module_map = {"zh": chinese, "ja": japanese, "en": english}
|
|
special = [
|
|
("%", "zh", "SP"),
|
|
("¥", "zh", "SP2"),
|
|
("^", "zh", "SP3"),
|
|
# ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
|
|
]
|
|
|
|
|
|
def clean_text(text, language):
|
|
for special_s, special_l, target_symbol in special:
|
|
if special_s in text and language == special_l:
|
|
return clean_special(text, language, special_s, target_symbol)
|
|
language_module = language_module_map[language]
|
|
norm_text = language_module.text_normalize(text)
|
|
if language == "zh":
|
|
phones, word2ph = language_module.g2p(norm_text)
|
|
assert len(phones) == sum(word2ph)
|
|
assert len(norm_text) == len(word2ph)
|
|
else:
|
|
phones = language_module.g2p(norm_text)
|
|
word2ph = None
|
|
|
|
for ph in phones:
|
|
assert ph in symbols
|
|
return phones, word2ph, norm_text
|
|
|
|
|
|
def clean_special(text, language, special_s, target_symbol):
|
|
"""
|
|
特殊静音段sp符号处理
|
|
"""
|
|
text = text.replace(special_s, ",")
|
|
language_module = language_module_map[language]
|
|
norm_text = language_module.text_normalize(text)
|
|
phones = language_module.g2p(norm_text)
|
|
new_ph = []
|
|
for ph in phones:
|
|
assert ph in symbols
|
|
if ph == ",":
|
|
new_ph.append(target_symbol)
|
|
else:
|
|
new_ph.append(ph)
|
|
return new_ph
|
|
|
|
|
|
def text_to_sequence(text, language):
|
|
phones = clean_text(text)
|
|
return cleaned_text_to_sequence(phones)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print(clean_text("你好%啊啊啊额、还是到付红四方。", "zh"))
|