From dac35d949528620891204a7c837ebfd132bf8ed5 Mon Sep 17 00:00:00 2001
From: KamioRinn <snowsdream@live.com>
Date: Sun, 4 Aug 2024 04:40:38 +0800
Subject: [PATCH] fix webui and symbols

---
 GPT_SoVITS/inference_webui.py |  2 +-
 GPT_SoVITS/text/cantonese.py  |  2 +-
 GPT_SoVITS/text/chinese.py    |  2 +-
 GPT_SoVITS/text/chinese2.py   |  2 +-
 GPT_SoVITS/text/cleaner.py    |  6 +++---
 GPT_SoVITS/text/english.py    |  5 +----
 GPT_SoVITS/text/japanese.py   | 28 +++++++---------------------
 GPT_SoVITS/text/korean.py     |  7 ++-----
 8 files changed, 17 insertions(+), 37 deletions(-)
diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
index aee3f6aa..999bc78a 100644
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -228,7 +228,7 @@ dict_language = {
 
 
 def clean_text_inf(text, language, version):
-    phones, word2ph, norm_text = clean_text(text, language)
+    phones, word2ph, norm_text = clean_text(text, language, version)
     phones = cleaned_text_to_sequence(phones, version)
     return phones, word2ph, norm_text
 
diff --git a/GPT_SoVITS/text/cantonese.py b/GPT_SoVITS/text/cantonese.py
index e3325344..b31dbd58 100644
--- a/GPT_SoVITS/text/cantonese.py
+++ b/GPT_SoVITS/text/cantonese.py
@@ -187,7 +187,7 @@ def get_bert_feature(text, word2ph):
     return chinese_bert.get_bert_feature(text, word2ph)
 
 
-def g2p(text, version=""):
+def g2p(text):
     # word2ph = []
     jyuping = get_jyutping(text)
     # print(jyuping)
diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py
index fb484416..bebf3f03 100644
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@@ -61,7 +61,7 @@ def replace_consecutive_punctuation(text):
     return result
 
 
-def g2p(text, version=""):
+def g2p(text):
     pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
     sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
     phones, word2ph = _g2p(sentences)
diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py
index 58dd3654..a12e360e 100644
--- a/GPT_SoVITS/text/chinese2.py
+++ b/GPT_SoVITS/text/chinese2.py
@@ -60,7 +60,7 @@ def replace_punctuation(text):
     return replaced_text
 
 
-def g2p(text, version=""):
+def g2p(text):
     pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
     sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
     phones, word2ph = _g2p(sentences)
diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py
index b85e040b..2dcd4934 100644
--- a/GPT_SoVITS/text/cleaner.py
+++ b/GPT_SoVITS/text/cleaner.py
@@ -49,11 +49,11 @@ def clean_text(text, language, version):
             phones = [','] * (4 - len(phones)) + phones
         word2ph = None
     else:
-        phones = language_module.g2p(norm_text, version)
+        phones = language_module.g2p(norm_text)
         word2ph = None
 
     for ph in phones:
-        assert ph in symbols
+        phones = ['UNK' if ph not in symbols else ph for ph in phones]
     return phones, word2ph, norm_text
 
 
@@ -71,7 +71,7 @@ def clean_special(text, language, special_s, target_symbol, version):
     text = text.replace(special_s, ",")
     language_module = language_module_map[language]
     norm_text = language_module.text_normalize(text)
-    phones = language_module.g2p(norm_text, version)
+    phones = language_module.g2p(norm_text)
     new_ph = []
     for ph in phones[0]:
         assert ph in symbols
diff --git a/GPT_SoVITS/text/english.py b/GPT_SoVITS/text/english.py
index ecca386d..ceee52b4 100644
--- a/GPT_SoVITS/text/english.py
+++ b/GPT_SoVITS/text/english.py
@@ -6,9 +6,6 @@ from g2p_en import G2p
 
 from text.symbols import punctuation
 
-# if os.environ.get("version","v1")=="v1":
-#     from text.symbols import symbols
-# else:
 from text.symbols2 import symbols
 
 import unicodedata
@@ -361,7 +358,7 @@ class en_G2p(G2p):
 _g2p = en_G2p()
 
 
-def g2p(text, version=""):
+def g2p(text):
     # g2p_en 整段推理，剔除不存在的arpa返回
     phone_list = _g2p(text)
     phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]
diff --git a/GPT_SoVITS/text/japanese.py b/GPT_SoVITS/text/japanese.py
index ecb0fbe9..4c10720e 100644
--- a/GPT_SoVITS/text/japanese.py
+++ b/GPT_SoVITS/text/japanese.py
@@ -4,15 +4,6 @@ import sys
 
 import pyopenjtalk
 
-
-import os
-# if os.environ.get("version","v1")=="v1":
-#     from text.symbols import symbols
-# else:
-    # from text.symbols2 import symbols
-from text import symbols as symbols_v1
-from text import symbols2 as symbols_v2
-
 from text.symbols import punctuation
 # Regular expression matching Japanese without punctuation marks:
 _japanese_characters = re.compile(
@@ -51,7 +42,7 @@ _real_hatsuon = [
 ]
 
 
-def post_replace_ph(ph, version):
+def post_replace_ph(ph):
     rep_map = {
         "：": ",",
         "；": ",",
@@ -65,17 +56,12 @@ def post_replace_ph(ph, version):
         "...": "…",
     }
 
-    if version == "v1":
-        symbols = symbols_v1.symbols
-    else:
-        symbols = symbols_v2.symbols
-
     if ph in rep_map.keys():
         ph = rep_map[ph]
-    if ph in symbols:
-        return ph
-    if ph not in symbols:
-        ph = "UNK"
+    # if ph in symbols:
+    #     return ph
+    # if ph not in symbols:
+    #     ph = "UNK"
     return ph
 
 
@@ -203,9 +189,9 @@ def _numeric_feature_by_regex(regex, s):
         return -50
     return int(match.group(1))
 
-def g2p(norm_text, version, with_prosody=True):
+def g2p(norm_text, with_prosody=True):
     phones = preprocess_jap(norm_text, with_prosody)
-    phones = [post_replace_ph(i,version) for i in phones]
+    phones = [post_replace_ph(i) for i in phones]
     # todo: implement tones and word2ph
     return phones
 
diff --git a/GPT_SoVITS/text/korean.py b/GPT_SoVITS/text/korean.py
index 856de96d..23dea59a 100644
--- a/GPT_SoVITS/text/korean.py
+++ b/GPT_SoVITS/text/korean.py
@@ -2,10 +2,7 @@ import re
 from jamo import h2j, j2hcj
 import ko_pron
 from g2pk2 import G2p
-import os
-# if os.environ.get("version","v1")=="v1":
-#     from text.symbols import symbols
-# else:
+
 from text.symbols2 import symbols 
 
 # This is a list of Korean classifiers preceded by pure Korean numerals.
@@ -255,7 +252,7 @@ def post_replace_ph(ph):
         ph = "停"
     return ph
 
-def g2p(text, version=""):
+def g2p(text):
     text = latin_to_hangul(text)
     text = _g2p(text)
     text = divide_hangul(text)