From 82b458625d83f8edaf9636166e29bf4056a6aae7 Mon Sep 17 00:00:00 2001 From: Ella Zhang <144317607+EllaZhangCA@users.noreply.github.com> Date: Tue, 23 Sep 2025 02:48:07 -0700 Subject: [PATCH] =?UTF-8?q?=E4=B8=BA=E4=B8=AD=E6=96=87=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BA=86=E8=B4=A7=E5=B8=81=E8=AE=A1=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPT_SoVITS/text/cleaner.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py index 7ba8f376..7c13023c 100644 --- a/GPT_SoVITS/text/cleaner.py +++ b/GPT_SoVITS/text/cleaner.py @@ -10,6 +10,7 @@ import os from text import symbols as symbols_v1 from text import symbols2 as symbols_v2 + special = [ # ("%", "zh", "SP"), ("¥", "zh", "SP2"), @@ -17,7 +18,6 @@ special = [ # ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧 ] - def clean_text(text, language, version=None): if version is None: version = os.environ.get("version", "v2") @@ -31,6 +31,14 @@ def clean_text(text, language, version=None): if language not in language_module_map: language = "en" text = " " + if language in ("zh"): #处理货币似乎只能这里截胡,不然货币符号会被吞 + from text.zh_normalization.num import ( + RE_CNY_PREFIX, RE_CNY_SUFFIX, replace_cny_prefix, replace_cny_suffix, + RE_USD_SYMBOL, RE_USD_SUFFIX, replace_usd_symbol, replace_usd_suffix,) + text = RE_CNY_PREFIX.sub(replace_cny_prefix, text) + text = RE_CNY_SUFFIX.sub(replace_cny_suffix, text) + text = RE_USD_SYMBOL.sub(replace_usd_symbol, text) + text = RE_USD_SUFFIX.sub(replace_usd_suffix, text) for special_s, special_l, target_symbol in special: if special_s in text and language == special_l: return clean_special(text, language, special_s, target_symbol, version)