fix webui and symbols

2026-01-09 03:46:58 +08:00 · 2024-08-04 04:40:38 +08:00 · 2024-08-04 04:40:38 +08:00 · dac35d9495
commit dac35d9495
parent be4b5e9abb
8 changed files with 17 additions and 37 deletions
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -228,7 +228,7 @@ dict_language = {


 def clean_text_inf(text, language, version):
-    phones, word2ph, norm_text = clean_text(text, language)
+    phones, word2ph, norm_text = clean_text(text, language, version)
    phones = cleaned_text_to_sequence(phones, version)
    return phones, word2ph, norm_text

--- a/GPT_SoVITS/text/cantonese.py
+++ b/GPT_SoVITS/text/cantonese.py
@ -187,7 +187,7 @@ def get_bert_feature(text, word2ph):
    return chinese_bert.get_bert_feature(text, word2ph)


-def g2p(text, version=""):
+def g2p(text):
    # word2ph = []
    jyuping = get_jyutping(text)
    # print(jyuping)
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@ -61,7 +61,7 @@ def replace_consecutive_punctuation(text):
    return result


-def g2p(text, version=""):
+def g2p(text):
    pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
    sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
    phones, word2ph = _g2p(sentences)
--- a/GPT_SoVITS/text/chinese2.py
+++ b/GPT_SoVITS/text/chinese2.py
@ -60,7 +60,7 @@ def replace_punctuation(text):
    return replaced_text


-def g2p(text, version=""):
+def g2p(text):
    pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
    sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
    phones, word2ph = _g2p(sentences)
--- a/GPT_SoVITS/text/cleaner.py
+++ b/GPT_SoVITS/text/cleaner.py
@ -49,11 +49,11 @@ def clean_text(text, language, version):
            phones = [','] * (4 - len(phones)) + phones
        word2ph = None
    else:
-        phones = language_module.g2p(norm_text, version)
+        phones = language_module.g2p(norm_text)
        word2ph = None

    for ph in phones:
-        assert ph in symbols
+        phones = ['UNK' if ph not in symbols else ph for ph in phones]
    return phones, word2ph, norm_text


@ -71,7 +71,7 @@ def clean_special(text, language, special_s, target_symbol, version):
    text = text.replace(special_s, ",")
    language_module = language_module_map[language]
    norm_text = language_module.text_normalize(text)
-    phones = language_module.g2p(norm_text, version)
+    phones = language_module.g2p(norm_text)
    new_ph = []
    for ph in phones[0]:
        assert ph in symbols
--- a/GPT_SoVITS/text/english.py
+++ b/GPT_SoVITS/text/english.py
@ -6,9 +6,6 @@ from g2p_en import G2p

 from text.symbols import punctuation

-# if os.environ.get("version","v1")=="v1":
-#     from text.symbols import symbols
-# else:
 from text.symbols2 import symbols

 import unicodedata
@ -361,7 +358,7 @@ class en_G2p(G2p):
 _g2p = en_G2p()


-def g2p(text, version=""):
+def g2p(text):
    # g2p_en 整段推理，剔除不存在的arpa返回
    phone_list = _g2p(text)
    phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]
--- a/GPT_SoVITS/text/japanese.py
+++ b/GPT_SoVITS/text/japanese.py
@ -4,15 +4,6 @@ import sys

 import pyopenjtalk

-
-import os
-# if os.environ.get("version","v1")=="v1":
-#     from text.symbols import symbols
-# else:
-    # from text.symbols2 import symbols
-from text import symbols as symbols_v1
-from text import symbols2 as symbols_v2
-
 from text.symbols import punctuation
 # Regular expression matching Japanese without punctuation marks:
 _japanese_characters = re.compile(
@ -51,7 +42,7 @@ _real_hatsuon = [
 ]


-def post_replace_ph(ph, version):
+def post_replace_ph(ph):
    rep_map = {
        "：": ",",
        "；": ",",
@ -65,17 +56,12 @@ def post_replace_ph(ph, version):
        "...": "…",
    }

-    if version == "v1":
-        symbols = symbols_v1.symbols
-    else:
-        symbols = symbols_v2.symbols
-
    if ph in rep_map.keys():
        ph = rep_map[ph]
-    if ph in symbols:
-        return ph
-    if ph not in symbols:
-        ph = "UNK"
+    # if ph in symbols:
+    #     return ph
+    # if ph not in symbols:
+    #     ph = "UNK"
    return ph


@ -203,9 +189,9 @@ def _numeric_feature_by_regex(regex, s):
        return -50
    return int(match.group(1))

-def g2p(norm_text, version, with_prosody=True):
+def g2p(norm_text, with_prosody=True):
    phones = preprocess_jap(norm_text, with_prosody)
-    phones = [post_replace_ph(i,version) for i in phones]
+    phones = [post_replace_ph(i) for i in phones]
    # todo: implement tones and word2ph
    return phones

--- a/GPT_SoVITS/text/korean.py
+++ b/GPT_SoVITS/text/korean.py
@ -2,10 +2,7 @@ import re
 from jamo import h2j, j2hcj
 import ko_pron
 from g2pk2 import G2p
-import os
-# if os.environ.get("version","v1")=="v1":
-#     from text.symbols import symbols
-# else:
+
 from text.symbols2 import symbols 

 # This is a list of Korean classifiers preceded by pure Korean numerals.
@ -255,7 +252,7 @@ def post_replace_ph(ph):
        ph = "停"
    return ph

-def g2p(text, version=""):
+def g2p(text):
    text = latin_to_hangul(text)
    text = _g2p(text)
    text = divide_hangul(text)