From dac35d949528620891204a7c837ebfd132bf8ed5 Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Sun, 4 Aug 2024 04:40:38 +0800 Subject: [PATCH] fix webui and symbols --- GPT_SoVITS/inference_webui.py | 2 +- GPT_SoVITS/text/cantonese.py | 2 +- GPT_SoVITS/text/chinese.py | 2 +- GPT_SoVITS/text/chinese2.py | 2 +- GPT_SoVITS/text/cleaner.py | 6 +++--- GPT_SoVITS/text/english.py | 5 +---- GPT_SoVITS/text/japanese.py | 28 +++++++--------------------- GPT_SoVITS/text/korean.py | 7 ++----- 8 files changed, 17 insertions(+), 37 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index aee3f6aa..999bc78a 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -228,7 +228,7 @@ dict_language = { def clean_text_inf(text, language, version): - phones, word2ph, norm_text = clean_text(text, language) + phones, word2ph, norm_text = clean_text(text, language, version) phones = cleaned_text_to_sequence(phones, version) return phones, word2ph, norm_text diff --git a/GPT_SoVITS/text/cantonese.py b/GPT_SoVITS/text/cantonese.py index e3325344..b31dbd58 100644 --- a/GPT_SoVITS/text/cantonese.py +++ b/GPT_SoVITS/text/cantonese.py @@ -187,7 +187,7 @@ def get_bert_feature(text, word2ph): return chinese_bert.get_bert_feature(text, word2ph) -def g2p(text, version=""): +def g2p(text): # word2ph = [] jyuping = get_jyutping(text) # print(jyuping) diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py index fb484416..bebf3f03 100644 --- a/GPT_SoVITS/text/chinese.py +++ b/GPT_SoVITS/text/chinese.py @@ -61,7 +61,7 @@ def replace_consecutive_punctuation(text): return result -def g2p(text, version=""): +def g2p(text): pattern = r"(?<=[{0}])\s*".format("".join(punctuation)) sentences = [i for i in re.split(pattern, text) if i.strip() != ""] phones, word2ph = _g2p(sentences) diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py index 58dd3654..a12e360e 100644 --- a/GPT_SoVITS/text/chinese2.py +++ b/GPT_SoVITS/text/chinese2.py @@ -60,7 +60,7 @@ def replace_punctuation(text): return replaced_text -def g2p(text, version=""): +def g2p(text): pattern = r"(?<=[{0}])\s*".format("".join(punctuation)) sentences = [i for i in re.split(pattern, text) if i.strip() != ""] phones, word2ph = _g2p(sentences) diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py index b85e040b..2dcd4934 100644 --- a/GPT_SoVITS/text/cleaner.py +++ b/GPT_SoVITS/text/cleaner.py @@ -49,11 +49,11 @@ def clean_text(text, language, version): phones = [','] * (4 - len(phones)) + phones word2ph = None else: - phones = language_module.g2p(norm_text, version) + phones = language_module.g2p(norm_text) word2ph = None for ph in phones: - assert ph in symbols + phones = ['UNK' if ph not in symbols else ph for ph in phones] return phones, word2ph, norm_text @@ -71,7 +71,7 @@ def clean_special(text, language, special_s, target_symbol, version): text = text.replace(special_s, ",") language_module = language_module_map[language] norm_text = language_module.text_normalize(text) - phones = language_module.g2p(norm_text, version) + phones = language_module.g2p(norm_text) new_ph = [] for ph in phones[0]: assert ph in symbols diff --git a/GPT_SoVITS/text/english.py b/GPT_SoVITS/text/english.py index ecca386d..ceee52b4 100644 --- a/GPT_SoVITS/text/english.py +++ b/GPT_SoVITS/text/english.py @@ -6,9 +6,6 @@ from g2p_en import G2p from text.symbols import punctuation -# if os.environ.get("version","v1")=="v1": -# from text.symbols import symbols -# else: from text.symbols2 import symbols import unicodedata @@ -361,7 +358,7 @@ class en_G2p(G2p): _g2p = en_G2p() -def g2p(text, version=""): +def g2p(text): # g2p_en 整段推理,剔除不存在的arpa返回 phone_list = _g2p(text) phones = [ph if ph != "" else "UNK" for ph in phone_list if ph not in [" ", "", "UW", "", ""]] diff --git a/GPT_SoVITS/text/japanese.py b/GPT_SoVITS/text/japanese.py index ecb0fbe9..4c10720e 100644 --- a/GPT_SoVITS/text/japanese.py +++ b/GPT_SoVITS/text/japanese.py @@ -4,15 +4,6 @@ import sys import pyopenjtalk - -import os -# if os.environ.get("version","v1")=="v1": -# from text.symbols import symbols -# else: - # from text.symbols2 import symbols -from text import symbols as symbols_v1 -from text import symbols2 as symbols_v2 - from text.symbols import punctuation # Regular expression matching Japanese without punctuation marks: _japanese_characters = re.compile( @@ -51,7 +42,7 @@ _real_hatsuon = [ ] -def post_replace_ph(ph, version): +def post_replace_ph(ph): rep_map = { ":": ",", ";": ",", @@ -65,17 +56,12 @@ def post_replace_ph(ph, version): "...": "…", } - if version == "v1": - symbols = symbols_v1.symbols - else: - symbols = symbols_v2.symbols - if ph in rep_map.keys(): ph = rep_map[ph] - if ph in symbols: - return ph - if ph not in symbols: - ph = "UNK" + # if ph in symbols: + # return ph + # if ph not in symbols: + # ph = "UNK" return ph @@ -203,9 +189,9 @@ def _numeric_feature_by_regex(regex, s): return -50 return int(match.group(1)) -def g2p(norm_text, version, with_prosody=True): +def g2p(norm_text, with_prosody=True): phones = preprocess_jap(norm_text, with_prosody) - phones = [post_replace_ph(i,version) for i in phones] + phones = [post_replace_ph(i) for i in phones] # todo: implement tones and word2ph return phones diff --git a/GPT_SoVITS/text/korean.py b/GPT_SoVITS/text/korean.py index 856de96d..23dea59a 100644 --- a/GPT_SoVITS/text/korean.py +++ b/GPT_SoVITS/text/korean.py @@ -2,10 +2,7 @@ import re from jamo import h2j, j2hcj import ko_pron from g2pk2 import G2p -import os -# if os.environ.get("version","v1")=="v1": -# from text.symbols import symbols -# else: + from text.symbols2 import symbols # This is a list of Korean classifiers preceded by pure Korean numerals. @@ -255,7 +252,7 @@ def post_replace_ph(ph): ph = "停" return ph -def g2p(text, version=""): +def g2p(text): text = latin_to_hangul(text) text = _g2p(text) text = divide_hangul(text)