mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-09-29 00:30:15 +08:00
Merge 8c0cb0d691554d8311d8904972ae3efa8bfd1cc4 into 11aa78bd9bda8b53047cfcae03abf7ca94d27391
This commit is contained in:
commit
3d28749110
@ -10,14 +10,14 @@ import os
|
||||
from text import symbols as symbols_v1
|
||||
from text import symbols2 as symbols_v2
|
||||
|
||||
|
||||
special = [
|
||||
# ("%", "zh", "SP"),
|
||||
("¥", "zh", "SP2"),
|
||||
# ("¥", "zh", "SP2"), #加了货币计数所以人民币符不是SP2了
|
||||
("^", "zh", "SP3"),
|
||||
# ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
|
||||
]
|
||||
|
||||
|
||||
def clean_text(text, language, version=None):
|
||||
if version is None:
|
||||
version = os.environ.get("version", "v2")
|
||||
@ -31,6 +31,14 @@ def clean_text(text, language, version=None):
|
||||
if language not in language_module_map:
|
||||
language = "en"
|
||||
text = " "
|
||||
if language in ("zh"): #处理货币似乎最佳方案是这里截胡,不然可能被吞...
|
||||
from text.zh_normalization.num import (
|
||||
RE_CNY_PREFIX, RE_CNY_SUFFIX, replace_cny_prefix, replace_cny_suffix,
|
||||
RE_USD_SYMBOL, RE_USD_SUFFIX, replace_usd_symbol, replace_usd_suffix,)
|
||||
text = RE_CNY_PREFIX.sub(replace_cny_prefix, text)
|
||||
text = RE_CNY_SUFFIX.sub(replace_cny_suffix, text)
|
||||
text = RE_USD_SYMBOL.sub(replace_usd_symbol, text)
|
||||
text = RE_USD_SUFFIX.sub(replace_usd_suffix, text)
|
||||
for special_s, special_l, target_symbol in special:
|
||||
if special_s in text and language == special_l:
|
||||
return clean_special(text, language, special_s, target_symbol, version)
|
||||
|
@ -337,3 +337,116 @@ def num2str(value_string: str) -> str:
|
||||
result = result if result else "零"
|
||||
result += "点" + verbalize_digit(decimal)
|
||||
return result
|
||||
|
||||
RE_CNY_PREFIX = re.compile(r"(?:¥|¥)\s*(-?\d[\d,]*(?:\.\d+)?)")
|
||||
RE_CNY_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:人民币|元|CNY|cny|¥|¥))")
|
||||
|
||||
def _strip_commas(s: str) -> str:
|
||||
return s.replace(",", "")
|
||||
|
||||
def _split_amount(amount: str):
|
||||
neg = amount.startswith("-")
|
||||
if neg:
|
||||
amount = amount[1:]
|
||||
amount = _strip_commas(amount) or "0"
|
||||
|
||||
if "." in amount:
|
||||
integer, frac = amount.split(".", 1)
|
||||
had_frac = True
|
||||
else:
|
||||
integer, frac, had_frac = amount, "", False
|
||||
|
||||
integer = integer or "0"
|
||||
frac = (frac + "00")[:2]
|
||||
return neg, integer, frac, had_frac
|
||||
|
||||
#人民币和美元的处理都在cleaner那边,防吞
|
||||
def replace_cny_amount(amount: str, num2str) -> str:
|
||||
neg, integer, frac, had_frac = _split_amount(amount)
|
||||
|
||||
integer_cn = num2str(integer) if integer != "0" else "零"
|
||||
|
||||
jiao, fen = frac[0], frac[1]
|
||||
parts = []
|
||||
|
||||
if integer != "0":
|
||||
parts.append(integer_cn + "元")
|
||||
else:
|
||||
parts.append("零元")
|
||||
|
||||
if jiao != "0" or fen != "0":
|
||||
if jiao != "0":
|
||||
parts.append(num2str(jiao) + "角")
|
||||
if fen != "0":
|
||||
parts.append(num2str(fen) + "分")
|
||||
elif had_frac:
|
||||
parts.append("整")
|
||||
|
||||
res = "".join(parts)
|
||||
if neg and res and res[0] != "负":
|
||||
res = "负" + res
|
||||
return res
|
||||
|
||||
def replace_cny_prefix(m, num2str=num2str):
|
||||
return replace_cny_amount(m.group(1), num2str)
|
||||
|
||||
def replace_cny_suffix(m, num2str=num2str):
|
||||
return replace_cny_amount(m.group(1), num2str)
|
||||
|
||||
#我知道美元符也可能是加拿大元什么的,但是就当它美元吧whatever
|
||||
RE_USD_SYMBOL = re.compile(r"(?:\$|$)\s*(-?\d[\d,]*(?:\.\d+)?)")
|
||||
RE_USD_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:美元|USD|usd|\$|$))")
|
||||
|
||||
def _strip_commas(s: str) -> str:
|
||||
return s.replace(",", "")
|
||||
|
||||
def _split_amount(amount: str):
|
||||
neg = amount.startswith("-")
|
||||
if neg:
|
||||
amount = amount[1:]
|
||||
amount = _strip_commas(amount) or "0"
|
||||
|
||||
if "." in amount:
|
||||
integer, frac = amount.split(".", 1)
|
||||
had_frac = True
|
||||
else:
|
||||
integer, frac, had_frac = amount, "", False
|
||||
|
||||
integer = integer or "0"
|
||||
# 只保留两位小数用来读美分
|
||||
frac = (frac + "00")[:2]
|
||||
return neg, integer, frac, had_frac
|
||||
|
||||
def replace_usd_amount(amount: str, num2str) -> str:
|
||||
neg, integer, frac, had_frac = _split_amount(amount)
|
||||
|
||||
integer_cn = num2str(integer) if integer != "0" else "零"
|
||||
|
||||
jiao, fen = frac[0], frac[1]
|
||||
parts = []
|
||||
if integer != "0":
|
||||
parts.append(integer_cn + "美元")
|
||||
|
||||
if jiao != "0" or fen != "0":
|
||||
cents = ""
|
||||
if jiao != "0":
|
||||
cents += num2str(jiao) + "十"
|
||||
if fen != "0":
|
||||
cents += num2str(fen)
|
||||
cents = cents.replace("一十", "十")
|
||||
parts.append(cents + "美分")
|
||||
elif had_frac:
|
||||
parts.append("整")
|
||||
elif integer == "0":
|
||||
parts = ["零美元"]
|
||||
|
||||
res = "".join(parts)
|
||||
if neg and res and res[0] != "负":
|
||||
res = "负" + res
|
||||
return res
|
||||
|
||||
def replace_usd_symbol(m, num2str=num2str):
|
||||
return replace_usd_amount(m.group(1), num2str)
|
||||
|
||||
def replace_usd_suffix(m, num2str=num2str):
|
||||
return replace_usd_amount(m.group(1), num2str)
|
||||
|
Loading…
x
Reference in New Issue
Block a user