mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Normalize chinese arithmetic operations (#947)
This commit is contained in:
parent
4e43f6097f
commit
a3c4e040c8
@ -106,6 +106,29 @@ def replace_default_num(match):
|
||||
return verbalize_digit(number, alt_one=True)
|
||||
|
||||
|
||||
# 加减乘除
|
||||
RE_ASMD = re.compile(
|
||||
r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))([\+\-\×÷=])((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
|
||||
asmd_map = {
|
||||
'+': '加',
|
||||
'-': '减',
|
||||
'×': '乘',
|
||||
'÷': '除',
|
||||
'=': '等于'
|
||||
}
|
||||
|
||||
|
||||
def replace_asmd(match) -> str:
|
||||
"""
|
||||
Args:
|
||||
match (re.Match)
|
||||
Returns:
|
||||
str
|
||||
"""
|
||||
result = match.group(1) + asmd_map[match.group(8)] + match.group(9)
|
||||
return result
|
||||
|
||||
|
||||
# 数字表达式
|
||||
# 纯小数
|
||||
RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
|
||||
@ -155,7 +178,13 @@ def replace_number(match) -> str:
|
||||
# match.group(1) and match.group(8) are copy from RE_NUMBER
|
||||
|
||||
RE_RANGE = re.compile(
|
||||
r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
|
||||
r"""
|
||||
(?<![\d\+\-\×÷=]) # 使用反向前瞻以确保数字范围之前没有其他数字和操作符
|
||||
((-?)((\d+)(\.\d+)?)) # 匹配范围起始的负数或正数(整数或小数)
|
||||
[-~] # 匹配范围分隔符
|
||||
((-?)((\d+)(\.\d+)?)) # 匹配范围结束的负数或正数(整数或小数)
|
||||
(?![\d\+\-\×÷=]) # 使用正向前瞻以确保数字范围之后没有其他数字和操作符
|
||||
""", re.VERBOSE)
|
||||
|
||||
|
||||
def replace_range(match) -> str:
|
||||
@ -165,7 +194,7 @@ def replace_range(match) -> str:
|
||||
Returns:
|
||||
str
|
||||
"""
|
||||
first, second = match.group(1), match.group(8)
|
||||
first, second = match.group(1), match.group(6)
|
||||
first = RE_NUMBER.sub(replace_number, first)
|
||||
second = RE_NUMBER.sub(replace_number, second)
|
||||
result = f"{first}到{second}"
|
||||
|
@ -34,6 +34,7 @@ from .num import RE_PERCENTAGE
|
||||
from .num import RE_POSITIVE_QUANTIFIERS
|
||||
from .num import RE_RANGE
|
||||
from .num import RE_TO_RANGE
|
||||
from .num import RE_ASMD
|
||||
from .num import replace_default_num
|
||||
from .num import replace_frac
|
||||
from .num import replace_negative_num
|
||||
@ -42,6 +43,7 @@ from .num import replace_percentage
|
||||
from .num import replace_positive_quantifier
|
||||
from .num import replace_range
|
||||
from .num import replace_to_range
|
||||
from .num import replace_asmd
|
||||
from .phonecode import RE_MOBILE_PHONE
|
||||
from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
|
||||
from .phonecode import RE_TELEPHONE
|
||||
@ -67,7 +69,7 @@ class TextNormalizer():
|
||||
if lang == "zh":
|
||||
text = text.replace(" ", "")
|
||||
# 过滤掉特殊字符
|
||||
text = re.sub(r'[——《》【】<=>{}()()#&@“”^_|\\]', '', text)
|
||||
text = re.sub(r'[——《》【】<>{}()()#&@“”^_|\\]', '', text)
|
||||
text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
|
||||
text = text.strip()
|
||||
sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
|
||||
@ -142,6 +144,11 @@ class TextNormalizer():
|
||||
sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
|
||||
|
||||
sentence = RE_RANGE.sub(replace_range, sentence)
|
||||
|
||||
# 处理加减乘除
|
||||
while RE_ASMD.search(sentence):
|
||||
sentence = RE_ASMD.sub(replace_asmd, sentence)
|
||||
|
||||
sentence = RE_INTEGER.sub(replace_negative_num, sentence)
|
||||
sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
|
||||
sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,
|
||||
|
Loading…
x
Reference in New Issue
Block a user