modified: GPT_SoVITS/text/en_normalization/expend.py

modified:   GPT_SoVITS/text/english.py
This commit is contained in:
Cosmo Clara 2024-11-20 14:12:10 +00:00
parent d6222bc4d7
commit 604823b402
2 changed files with 10 additions and 10 deletions

View File

@ -230,7 +230,7 @@ def _expand_number(m):
return _inflect.number_to_words(num, andword='')
def normalize_numbers(text):
def normalize(text):
"""
!!! 所有的处理都需要正确的输入 !!!
可以添加新的处理只需要添加正则表达式和对应的处理函数即可
@ -263,10 +263,10 @@ def normalize_numbers(text):
if __name__ == '__main__':
# 我觉得其实可以把切分结果展示出来只读或者修改不影响传给TTS的实际text
# 然后让用户确认后再输入给 TTS可以让用户检查自己有没有不标准的输入
print(normalize_numbers("1. test ordinal number 1st"))
print(normalize_numbers("32.3$, $6.24, 1.1£, £7.14."))
print(normalize_numbers("3/23, 1/2, 3/2, 1/3, 6/1"))
print(normalize_numbers("1st, 22nd"))
print(normalize_numbers("a test 20h, 1.2s, 1L, 0.1km"))
print(normalize_numbers("a test of time 4:00, 13:00, 13:30"))
print(normalize_numbers("a test of temperature 4°F, 23°C, -19°C"))
print(normalize("1. test ordinal number 1st"))
print(normalize("32.3$, $6.24, 1.1£, £7.14."))
print(normalize("3/23, 1/2, 3/2, 1/3, 6/1"))
print(normalize("1st, 22nd"))
print(normalize("a test 20h, 1.2s, 1L, 0.1km"))
print(normalize("a test of time 4:00, 13:00, 13:30"))
print(normalize("a test of temperature 4°F, 23°C, -19°C"))

View File

@ -9,7 +9,7 @@ from text.symbols import punctuation
from text.symbols2 import symbols
from builtins import str as unicode
from text.en_normalization.expend import normalize_numbers
from text.en_normalization.expend import normalize
from nltk.tokenize import TweetTokenizer
word_tokenize = TweetTokenizer().tokenize
from nltk import pos_tag
@ -350,7 +350,7 @@ def text_normalize(text):
text = pattern.sub(lambda x: rep_map[x.group()], text)
text = unicode(text)
text = normalize_numbers(text)
text = normalize(text)
# 避免重复标点引起的参考泄露
text = replace_consecutive_punctuation(text)