mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-06 22:50:00 +08:00
modified: GPT_SoVITS/text/en_normalization/expend.py
modified: GPT_SoVITS/text/english.py
This commit is contained in:
parent
d6222bc4d7
commit
604823b402
@ -230,7 +230,7 @@ def _expand_number(m):
|
|||||||
return _inflect.number_to_words(num, andword='')
|
return _inflect.number_to_words(num, andword='')
|
||||||
|
|
||||||
|
|
||||||
def normalize_numbers(text):
|
def normalize(text):
|
||||||
"""
|
"""
|
||||||
!!! 所有的处理都需要正确的输入 !!!
|
!!! 所有的处理都需要正确的输入 !!!
|
||||||
可以添加新的处理,只需要添加正则表达式和对应的处理函数即可
|
可以添加新的处理,只需要添加正则表达式和对应的处理函数即可
|
||||||
@ -263,10 +263,10 @@ def normalize_numbers(text):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# 我觉得其实可以把切分结果展示出来(只读,或者修改不影响传给TTS的实际text)
|
# 我觉得其实可以把切分结果展示出来(只读,或者修改不影响传给TTS的实际text)
|
||||||
# 然后让用户确认后再输入给 TTS,可以让用户检查自己有没有不标准的输入
|
# 然后让用户确认后再输入给 TTS,可以让用户检查自己有没有不标准的输入
|
||||||
print(normalize_numbers("1. test ordinal number 1st"))
|
print(normalize("1. test ordinal number 1st"))
|
||||||
print(normalize_numbers("32.3$, $6.24, 1.1£, £7.14."))
|
print(normalize("32.3$, $6.24, 1.1£, £7.14."))
|
||||||
print(normalize_numbers("3/23, 1/2, 3/2, 1/3, 6/1"))
|
print(normalize("3/23, 1/2, 3/2, 1/3, 6/1"))
|
||||||
print(normalize_numbers("1st, 22nd"))
|
print(normalize("1st, 22nd"))
|
||||||
print(normalize_numbers("a test 20h, 1.2s, 1L, 0.1km"))
|
print(normalize("a test 20h, 1.2s, 1L, 0.1km"))
|
||||||
print(normalize_numbers("a test of time 4:00, 13:00, 13:30"))
|
print(normalize("a test of time 4:00, 13:00, 13:30"))
|
||||||
print(normalize_numbers("a test of temperature 4°F, 23°C, -19°C"))
|
print(normalize("a test of temperature 4°F, 23°C, -19°C"))
|
@ -9,7 +9,7 @@ from text.symbols import punctuation
|
|||||||
from text.symbols2 import symbols
|
from text.symbols2 import symbols
|
||||||
|
|
||||||
from builtins import str as unicode
|
from builtins import str as unicode
|
||||||
from text.en_normalization.expend import normalize_numbers
|
from text.en_normalization.expend import normalize
|
||||||
from nltk.tokenize import TweetTokenizer
|
from nltk.tokenize import TweetTokenizer
|
||||||
word_tokenize = TweetTokenizer().tokenize
|
word_tokenize = TweetTokenizer().tokenize
|
||||||
from nltk import pos_tag
|
from nltk import pos_tag
|
||||||
@ -350,7 +350,7 @@ def text_normalize(text):
|
|||||||
text = pattern.sub(lambda x: rep_map[x.group()], text)
|
text = pattern.sub(lambda x: rep_map[x.group()], text)
|
||||||
|
|
||||||
text = unicode(text)
|
text = unicode(text)
|
||||||
text = normalize_numbers(text)
|
text = normalize(text)
|
||||||
|
|
||||||
# 避免重复标点引起的参考泄露
|
# 避免重复标点引起的参考泄露
|
||||||
text = replace_consecutive_punctuation(text)
|
text = replace_consecutive_punctuation(text)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user