fix webui and symbols

This commit is contained in:
KamioRinn 2024-08-04 04:40:38 +08:00
parent be4b5e9abb
commit dac35d9495
8 changed files with 17 additions and 37 deletions

View File

@ -228,7 +228,7 @@ dict_language = {
def clean_text_inf(text, language, version):
phones, word2ph, norm_text = clean_text(text, language)
phones, word2ph, norm_text = clean_text(text, language, version)
phones = cleaned_text_to_sequence(phones, version)
return phones, word2ph, norm_text

View File

@ -187,7 +187,7 @@ def get_bert_feature(text, word2ph):
return chinese_bert.get_bert_feature(text, word2ph)
def g2p(text, version=""):
def g2p(text):
# word2ph = []
jyuping = get_jyutping(text)
# print(jyuping)

View File

@ -61,7 +61,7 @@ def replace_consecutive_punctuation(text):
return result
def g2p(text, version=""):
def g2p(text):
pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
phones, word2ph = _g2p(sentences)

View File

@ -60,7 +60,7 @@ def replace_punctuation(text):
return replaced_text
def g2p(text, version=""):
def g2p(text):
pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
phones, word2ph = _g2p(sentences)

View File

@ -49,11 +49,11 @@ def clean_text(text, language, version):
phones = [','] * (4 - len(phones)) + phones
word2ph = None
else:
phones = language_module.g2p(norm_text, version)
phones = language_module.g2p(norm_text)
word2ph = None
for ph in phones:
assert ph in symbols
phones = ['UNK' if ph not in symbols else ph for ph in phones]
return phones, word2ph, norm_text
@ -71,7 +71,7 @@ def clean_special(text, language, special_s, target_symbol, version):
text = text.replace(special_s, ",")
language_module = language_module_map[language]
norm_text = language_module.text_normalize(text)
phones = language_module.g2p(norm_text, version)
phones = language_module.g2p(norm_text)
new_ph = []
for ph in phones[0]:
assert ph in symbols

View File

@ -6,9 +6,6 @@ from g2p_en import G2p
from text.symbols import punctuation
# if os.environ.get("version","v1")=="v1":
# from text.symbols import symbols
# else:
from text.symbols2 import symbols
import unicodedata
@ -361,7 +358,7 @@ class en_G2p(G2p):
_g2p = en_G2p()
def g2p(text, version=""):
def g2p(text):
# g2p_en 整段推理剔除不存在的arpa返回
phone_list = _g2p(text)
phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]

View File

@ -4,15 +4,6 @@ import sys
import pyopenjtalk
import os
# if os.environ.get("version","v1")=="v1":
# from text.symbols import symbols
# else:
# from text.symbols2 import symbols
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2
from text.symbols import punctuation
# Regular expression matching Japanese without punctuation marks:
_japanese_characters = re.compile(
@ -51,7 +42,7 @@ _real_hatsuon = [
]
def post_replace_ph(ph, version):
def post_replace_ph(ph):
rep_map = {
"": ",",
"": ",",
@ -65,17 +56,12 @@ def post_replace_ph(ph, version):
"...": "",
}
if version == "v1":
symbols = symbols_v1.symbols
else:
symbols = symbols_v2.symbols
if ph in rep_map.keys():
ph = rep_map[ph]
if ph in symbols:
return ph
if ph not in symbols:
ph = "UNK"
# if ph in symbols:
# return ph
# if ph not in symbols:
# ph = "UNK"
return ph
@ -203,9 +189,9 @@ def _numeric_feature_by_regex(regex, s):
return -50
return int(match.group(1))
def g2p(norm_text, version, with_prosody=True):
def g2p(norm_text, with_prosody=True):
phones = preprocess_jap(norm_text, with_prosody)
phones = [post_replace_ph(i,version) for i in phones]
phones = [post_replace_ph(i) for i in phones]
# todo: implement tones and word2ph
return phones

View File

@ -2,10 +2,7 @@ import re
from jamo import h2j, j2hcj
import ko_pron
from g2pk2 import G2p
import os
# if os.environ.get("version","v1")=="v1":
# from text.symbols import symbols
# else:
from text.symbols2 import symbols
# This is a list of Korean classifiers preceded by pure Korean numerals.
@ -255,7 +252,7 @@ def post_replace_ph(ph):
ph = ""
return ph
def g2p(text, version=""):
def g2p(text):
text = latin_to_hangul(text)
text = _g2p(text)
text = divide_hangul(text)