Revert "add LLM translated CMUDICT-KATAKANA dictionary (#1660)"

This reverts commit 38cd8815781275a9b438d2c5812087c82f73a377.
This commit is contained in:
RVC-Boss 2024-10-02 21:52:50 +08:00 committed by GitHub
parent 38cd881578
commit c441fac22f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 134207 deletions

File diff suppressed because it is too large Load Diff

View File

@ -82,9 +82,8 @@ def post_replace_ph(ph):
ph = rep_map[ph]
# if ph in symbols:
# return ph
if ph not in symbols:
ph = "UNK"
# UNK may be useful as a pause token as it was trained in the model
# if ph not in symbols:
# ph = "UNK"
return ph
@ -104,8 +103,6 @@ def symbols_to_japanese(text):
def preprocess_jap(text, with_prosody=False):
"""Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html"""
text = symbols_to_japanese(text)
# English words to lower case, should have no influence on japanese words.
text = text.lower()
sentences = re.split(_japanese_marks, text)
marks = re.findall(_japanese_marks, text)
text = []
@ -222,6 +219,5 @@ def g2p(norm_text, with_prosody=True):
if __name__ == "__main__":
from text.symbols2 import symbols
phones = g2p("Hello.こんにちは今日もNiCe天気ですねtokyotowerに行きましょう")
phones = g2p("こんにちは, hello, AKITOです,よろしくお願いしますね!")
print(phones)