add LLM translated CMUDICT-KATAKANA dictionary

This commit is contained in:
Akito-UzukiP 2024-09-28 21:40:45 -04:00
parent 78c68d46cb
commit 79944b41bc
2 changed files with 134207 additions and 4 deletions

File diff suppressed because it is too large Load Diff

View File

@ -82,8 +82,9 @@ def post_replace_ph(ph):
ph = rep_map[ph]
# if ph in symbols:
# return ph
# if ph not in symbols:
# ph = "UNK"
if ph not in symbols:
ph = "UNK"
# UNK may be useful as a pause token as it was trained in the model
return ph
@ -103,6 +104,8 @@ def symbols_to_japanese(text):
def preprocess_jap(text, with_prosody=False):
"""Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html"""
text = symbols_to_japanese(text)
# English words to lower case, should have no influence on japanese words.
text = text.lower()
sentences = re.split(_japanese_marks, text)
marks = re.findall(_japanese_marks, text)
text = []
@ -219,5 +222,6 @@ def g2p(norm_text, with_prosody=True):
if __name__ == "__main__":
phones = g2p("こんにちは, hello, AKITOです,よろしくお願いしますね!")
from text.symbols2 import symbols
phones = g2p("Hello.こんにちは今日もNiCe天気ですねtokyotowerに行きましょう")
print(phones)