mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-09-29 00:30:15 +08:00
Merge 8c0cb0d691554d8311d8904972ae3efa8bfd1cc4 into 11aa78bd9bda8b53047cfcae03abf7ca94d27391
This commit is contained in:
commit
3d28749110
@ -10,14 +10,14 @@ import os
|
|||||||
from text import symbols as symbols_v1
|
from text import symbols as symbols_v1
|
||||||
from text import symbols2 as symbols_v2
|
from text import symbols2 as symbols_v2
|
||||||
|
|
||||||
|
|
||||||
special = [
|
special = [
|
||||||
# ("%", "zh", "SP"),
|
# ("%", "zh", "SP"),
|
||||||
("¥", "zh", "SP2"),
|
# ("¥", "zh", "SP2"), #加了货币计数所以人民币符不是SP2了
|
||||||
("^", "zh", "SP3"),
|
("^", "zh", "SP3"),
|
||||||
# ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
|
# ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def clean_text(text, language, version=None):
|
def clean_text(text, language, version=None):
|
||||||
if version is None:
|
if version is None:
|
||||||
version = os.environ.get("version", "v2")
|
version = os.environ.get("version", "v2")
|
||||||
@ -31,6 +31,14 @@ def clean_text(text, language, version=None):
|
|||||||
if language not in language_module_map:
|
if language not in language_module_map:
|
||||||
language = "en"
|
language = "en"
|
||||||
text = " "
|
text = " "
|
||||||
|
if language in ("zh"): #处理货币似乎最佳方案是这里截胡,不然可能被吞...
|
||||||
|
from text.zh_normalization.num import (
|
||||||
|
RE_CNY_PREFIX, RE_CNY_SUFFIX, replace_cny_prefix, replace_cny_suffix,
|
||||||
|
RE_USD_SYMBOL, RE_USD_SUFFIX, replace_usd_symbol, replace_usd_suffix,)
|
||||||
|
text = RE_CNY_PREFIX.sub(replace_cny_prefix, text)
|
||||||
|
text = RE_CNY_SUFFIX.sub(replace_cny_suffix, text)
|
||||||
|
text = RE_USD_SYMBOL.sub(replace_usd_symbol, text)
|
||||||
|
text = RE_USD_SUFFIX.sub(replace_usd_suffix, text)
|
||||||
for special_s, special_l, target_symbol in special:
|
for special_s, special_l, target_symbol in special:
|
||||||
if special_s in text and language == special_l:
|
if special_s in text and language == special_l:
|
||||||
return clean_special(text, language, special_s, target_symbol, version)
|
return clean_special(text, language, special_s, target_symbol, version)
|
||||||
|
@ -337,3 +337,116 @@ def num2str(value_string: str) -> str:
|
|||||||
result = result if result else "零"
|
result = result if result else "零"
|
||||||
result += "点" + verbalize_digit(decimal)
|
result += "点" + verbalize_digit(decimal)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
RE_CNY_PREFIX = re.compile(r"(?:¥|¥)\s*(-?\d[\d,]*(?:\.\d+)?)")
|
||||||
|
RE_CNY_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:人民币|元|CNY|cny|¥|¥))")
|
||||||
|
|
||||||
|
def _strip_commas(s: str) -> str:
|
||||||
|
return s.replace(",", "")
|
||||||
|
|
||||||
|
def _split_amount(amount: str):
|
||||||
|
neg = amount.startswith("-")
|
||||||
|
if neg:
|
||||||
|
amount = amount[1:]
|
||||||
|
amount = _strip_commas(amount) or "0"
|
||||||
|
|
||||||
|
if "." in amount:
|
||||||
|
integer, frac = amount.split(".", 1)
|
||||||
|
had_frac = True
|
||||||
|
else:
|
||||||
|
integer, frac, had_frac = amount, "", False
|
||||||
|
|
||||||
|
integer = integer or "0"
|
||||||
|
frac = (frac + "00")[:2]
|
||||||
|
return neg, integer, frac, had_frac
|
||||||
|
|
||||||
|
#人民币和美元的处理都在cleaner那边,防吞
|
||||||
|
def replace_cny_amount(amount: str, num2str) -> str:
|
||||||
|
neg, integer, frac, had_frac = _split_amount(amount)
|
||||||
|
|
||||||
|
integer_cn = num2str(integer) if integer != "0" else "零"
|
||||||
|
|
||||||
|
jiao, fen = frac[0], frac[1]
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
if integer != "0":
|
||||||
|
parts.append(integer_cn + "元")
|
||||||
|
else:
|
||||||
|
parts.append("零元")
|
||||||
|
|
||||||
|
if jiao != "0" or fen != "0":
|
||||||
|
if jiao != "0":
|
||||||
|
parts.append(num2str(jiao) + "角")
|
||||||
|
if fen != "0":
|
||||||
|
parts.append(num2str(fen) + "分")
|
||||||
|
elif had_frac:
|
||||||
|
parts.append("整")
|
||||||
|
|
||||||
|
res = "".join(parts)
|
||||||
|
if neg and res and res[0] != "负":
|
||||||
|
res = "负" + res
|
||||||
|
return res
|
||||||
|
|
||||||
|
def replace_cny_prefix(m, num2str=num2str):
|
||||||
|
return replace_cny_amount(m.group(1), num2str)
|
||||||
|
|
||||||
|
def replace_cny_suffix(m, num2str=num2str):
|
||||||
|
return replace_cny_amount(m.group(1), num2str)
|
||||||
|
|
||||||
|
#我知道美元符也可能是加拿大元什么的,但是就当它美元吧whatever
|
||||||
|
RE_USD_SYMBOL = re.compile(r"(?:\$|$)\s*(-?\d[\d,]*(?:\.\d+)?)")
|
||||||
|
RE_USD_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:美元|USD|usd|\$|$))")
|
||||||
|
|
||||||
|
def _strip_commas(s: str) -> str:
|
||||||
|
return s.replace(",", "")
|
||||||
|
|
||||||
|
def _split_amount(amount: str):
|
||||||
|
neg = amount.startswith("-")
|
||||||
|
if neg:
|
||||||
|
amount = amount[1:]
|
||||||
|
amount = _strip_commas(amount) or "0"
|
||||||
|
|
||||||
|
if "." in amount:
|
||||||
|
integer, frac = amount.split(".", 1)
|
||||||
|
had_frac = True
|
||||||
|
else:
|
||||||
|
integer, frac, had_frac = amount, "", False
|
||||||
|
|
||||||
|
integer = integer or "0"
|
||||||
|
# 只保留两位小数用来读美分
|
||||||
|
frac = (frac + "00")[:2]
|
||||||
|
return neg, integer, frac, had_frac
|
||||||
|
|
||||||
|
def replace_usd_amount(amount: str, num2str) -> str:
|
||||||
|
neg, integer, frac, had_frac = _split_amount(amount)
|
||||||
|
|
||||||
|
integer_cn = num2str(integer) if integer != "0" else "零"
|
||||||
|
|
||||||
|
jiao, fen = frac[0], frac[1]
|
||||||
|
parts = []
|
||||||
|
if integer != "0":
|
||||||
|
parts.append(integer_cn + "美元")
|
||||||
|
|
||||||
|
if jiao != "0" or fen != "0":
|
||||||
|
cents = ""
|
||||||
|
if jiao != "0":
|
||||||
|
cents += num2str(jiao) + "十"
|
||||||
|
if fen != "0":
|
||||||
|
cents += num2str(fen)
|
||||||
|
cents = cents.replace("一十", "十")
|
||||||
|
parts.append(cents + "美分")
|
||||||
|
elif had_frac:
|
||||||
|
parts.append("整")
|
||||||
|
elif integer == "0":
|
||||||
|
parts = ["零美元"]
|
||||||
|
|
||||||
|
res = "".join(parts)
|
||||||
|
if neg and res and res[0] != "负":
|
||||||
|
res = "负" + res
|
||||||
|
return res
|
||||||
|
|
||||||
|
def replace_usd_symbol(m, num2str=num2str):
|
||||||
|
return replace_usd_amount(m.group(1), num2str)
|
||||||
|
|
||||||
|
def replace_usd_suffix(m, num2str=num2str):
|
||||||
|
return replace_usd_amount(m.group(1), num2str)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user