modified: GPT_SoVITS/text/en_normalization/expend.py

modified: GPT_SoVITS/text/english.py
2025-10-06 06:29:59 +08:00 · 2024-11-20 14:12:10 +00:00 · 2024-11-20 14:12:10 +00:00 · 604823b402
commit 604823b402
parent d6222bc4d7
2 changed files with 10 additions and 10 deletions
--- a/GPT_SoVITS/text/en_normalization/expend.py
+++ b/GPT_SoVITS/text/en_normalization/expend.py
@ -230,7 +230,7 @@ def _expand_number(m):
        return _inflect.number_to_words(num, andword='')


-def normalize_numbers(text):
+def normalize(text):
    """
    !!! 所有的处理都需要正确的输入 !!!
    可以添加新的处理，只需要添加正则表达式和对应的处理函数即可
@ -263,10 +263,10 @@ def normalize_numbers(text):
 if __name__ == '__main__':
    # 我觉得其实可以把切分结果展示出来（只读，或者修改不影响传给TTS的实际text）
    # 然后让用户确认后再输入给 TTS，可以让用户检查自己有没有不标准的输入
-    print(normalize_numbers("1. test ordinal number 1st"))
-    print(normalize_numbers("32.3$, $6.24, 1.1£, £7.14."))
-    print(normalize_numbers("3/23, 1/2, 3/2, 1/3, 6/1"))
-    print(normalize_numbers("1st, 22nd"))
-    print(normalize_numbers("a test 20h, 1.2s, 1L, 0.1km"))
-    print(normalize_numbers("a test of time 4:00, 13:00, 13:30"))
-    print(normalize_numbers("a test of temperature 4°F, 23°C, -19°C"))
+    print(normalize("1. test ordinal number 1st"))
+    print(normalize("32.3$, $6.24, 1.1£, £7.14."))
+    print(normalize("3/23, 1/2, 3/2, 1/3, 6/1"))
+    print(normalize("1st, 22nd"))
+    print(normalize("a test 20h, 1.2s, 1L, 0.1km"))
+    print(normalize("a test of time 4:00, 13:00, 13:30"))
+    print(normalize("a test of temperature 4°F, 23°C, -19°C"))
--- a/GPT_SoVITS/text/english.py
+++ b/GPT_SoVITS/text/english.py
@ -9,7 +9,7 @@ from text.symbols import punctuation
 from text.symbols2 import symbols

 from builtins import str as unicode
-from text.en_normalization.expend import normalize_numbers
+from text.en_normalization.expend import normalize
 from nltk.tokenize import TweetTokenizer
 word_tokenize = TweetTokenizer().tokenize
 from nltk import pos_tag
@ -350,7 +350,7 @@ def text_normalize(text):
    text = pattern.sub(lambda x: rep_map[x.group()], text)
    
    text = unicode(text)
-    text = normalize_numbers(text)
+    text = normalize(text)
    
    # 避免重复标点引起的参考泄露
    text = replace_consecutive_punctuation(text)