From 82b458625d83f8edaf9636166e29bf4056a6aae7 Mon Sep 17 00:00:00 2001
From: Ella Zhang <144317607+EllaZhangCA@users.noreply.github.com>
Date: Tue, 23 Sep 2025 02:48:07 -0700
Subject: [PATCH 1/3] =?UTF-8?q?=E4=B8=BA=E4=B8=AD=E6=96=87=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0=E4=BA=86=E8=B4=A7=E5=B8=81=E8=AE=A1=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 GPT_SoVITS/text/cleaner.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py
index 7ba8f376..7c13023c 100644
--- a/GPT_SoVITS/text/cleaner.py
+++ b/GPT_SoVITS/text/cleaner.py
@@ -10,6 +10,7 @@ import os
 from text import symbols as symbols_v1
 from text import symbols2 as symbols_v2
 
+
 special = [
     # ("%", "zh", "SP"),
     ("￥", "zh", "SP2"),
@@ -17,7 +18,6 @@ special = [
     # ('@', 'zh', "SP4")#不搞鬼畜了，和第二版保持一致吧
 ]
 
-
 def clean_text(text, language, version=None):
     if version is None:
         version = os.environ.get("version", "v2")
@@ -31,6 +31,14 @@ def clean_text(text, language, version=None):
     if language not in language_module_map:
         language = "en"
         text = " "
+    if language in ("zh"): #处理货币似乎只能这里截胡，不然货币符号会被吞
+        from text.zh_normalization.num import (
+            RE_CNY_PREFIX, RE_CNY_SUFFIX, replace_cny_prefix, replace_cny_suffix,
+            RE_USD_SYMBOL, RE_USD_SUFFIX, replace_usd_symbol, replace_usd_suffix,)
+        text = RE_CNY_PREFIX.sub(replace_cny_prefix, text)
+        text = RE_CNY_SUFFIX.sub(replace_cny_suffix, text)
+        text = RE_USD_SYMBOL.sub(replace_usd_symbol, text)
+        text = RE_USD_SUFFIX.sub(replace_usd_suffix, text)
     for special_s, special_l, target_symbol in special:
         if special_s in text and language == special_l:
             return clean_special(text, language, special_s, target_symbol, version)

From 0c02ebf5aeb9d6d6465d17eaf65d4451a2ad91ec Mon Sep 17 00:00:00 2001
From: Ella Zhang <144317607+EllaZhangCA@users.noreply.github.com>
Date: Tue, 23 Sep 2025 02:48:30 -0700
Subject: [PATCH 2/3] Update num.py

---
 GPT_SoVITS/text/zh_normalization/num.py | 113 ++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/GPT_SoVITS/text/zh_normalization/num.py b/GPT_SoVITS/text/zh_normalization/num.py
index 14d602b0..3aed785f 100644
--- a/GPT_SoVITS/text/zh_normalization/num.py
+++ b/GPT_SoVITS/text/zh_normalization/num.py
@@ -337,3 +337,116 @@ def num2str(value_string: str) -> str:
         result = result if result else "零"
         result += "点" + verbalize_digit(decimal)
     return result
+
+RE_CNY_PREFIX = re.compile(r"(?:¥|￥)\s*(-?\d[\d,]*(?:\.\d+)?)")
+RE_CNY_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:人民币|元|CNY|cny|¥|￥))")
+
+def _strip_commas(s: str) -> str:
+    return s.replace(",", "")
+
+def _split_amount(amount: str):
+    neg = amount.startswith("-")
+    if neg:
+        amount = amount[1:]
+    amount = _strip_commas(amount) or "0"
+
+    if "." in amount:
+        integer, frac = amount.split(".", 1)
+        had_frac = True
+    else:
+        integer, frac, had_frac = amount, "", False
+
+    integer = integer or "0"
+    frac = (frac + "00")[:2]
+    return neg, integer, frac, had_frac
+
+#人民币和美元的处理都在cleaner那边，防吞
+def replace_cny_amount(amount: str, num2str) -> str:
+    neg, integer, frac, had_frac = _split_amount(amount)
+
+    integer_cn = num2str(integer) if integer != "0" else "零"
+
+    jiao, fen = frac[0], frac[1]
+    parts = []
+
+    if integer != "0":
+        parts.append(integer_cn + "元")
+    else:
+        parts.append("零元")
+
+    if jiao != "0" or fen != "0":
+        if jiao != "0":
+            parts.append(num2str(jiao) + "角")
+        if fen != "0":
+            parts.append(num2str(fen) + "分")
+    elif had_frac:
+        parts.append("整")
+
+    res = "".join(parts)
+    if neg and res and res[0] != "负":
+        res = "负" + res
+    return res
+
+def replace_cny_prefix(m, num2str=num2str):
+    return replace_cny_amount(m.group(1), num2str)
+
+def replace_cny_suffix(m, num2str=num2str):
+    return replace_cny_amount(m.group(1), num2str)
+
+#我知道美元符也可能是加拿大元什么的，但是就当它美元吧whatever
+RE_USD_SYMBOL = re.compile(r"(?:\$|＄)\s*(-?\d[\d,]*(?:\.\d+)?)")
+RE_USD_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:美元|USD|usd|\$|＄))")
+
+def _strip_commas(s: str) -> str:
+    return s.replace(",", "")
+
+def _split_amount(amount: str):
+    neg = amount.startswith("-")
+    if neg:
+        amount = amount[1:]
+    amount = _strip_commas(amount) or "0"
+
+    if "." in amount:
+        integer, frac = amount.split(".", 1)
+        had_frac = True
+    else:
+        integer, frac, had_frac = amount, "", False
+
+    integer = integer or "0"
+    # 只保留两位小数用来读美分
+    frac = (frac + "00")[:2]
+    return neg, integer, frac, had_frac
+
+def replace_usd_amount(amount: str, num2str) -> str:
+    neg, integer, frac, had_frac = _split_amount(amount)
+
+    integer_cn = num2str(integer) if integer != "0" else "零"
+
+    jiao, fen = frac[0], frac[1]
+    parts = []
+    if integer != "0":
+        parts.append(integer_cn + "美元")
+
+    if jiao != "0" or fen != "0":
+        cents = ""
+        if jiao != "0":
+            cents += num2str(jiao) + "十"
+        if fen != "0":
+            cents += num2str(fen)
+        cents = cents.replace("一十", "十")
+        parts.append(cents + "美分")
+    elif had_frac:
+        parts.append("整")
+    elif integer == "0":
+        parts = ["零美元"]
+
+    res = "".join(parts)
+    if neg and res and res[0] != "负":
+        res = "负" + res
+    return res
+
+def replace_usd_symbol(m, num2str=num2str):
+    return replace_usd_amount(m.group(1), num2str)
+
+def replace_usd_suffix(m, num2str=num2str):
+    return replace_usd_amount(m.group(1), num2str)

From 8c0cb0d691554d8311d8904972ae3efa8bfd1cc4 Mon Sep 17 00:00:00 2001
From: Ella Zhang <144317607+EllaZhangCA@users.noreply.github.com>
Date: Tue, 23 Sep 2025 02:50:39 -0700
Subject: [PATCH 3/3] =?UTF-8?q?=E4=B8=BA=E4=B8=AD=E6=96=87=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0=E4=BA=86=E8=B4=A7=E5=B8=81=E8=AE=A1=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 GPT_SoVITS/text/cleaner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py
index 7c13023c..388b4e16 100644
--- a/GPT_SoVITS/text/cleaner.py
+++ b/GPT_SoVITS/text/cleaner.py
@@ -13,7 +13,7 @@ from text import symbols2 as symbols_v2
 
 special = [
     # ("%", "zh", "SP"),
-    ("￥", "zh", "SP2"),
+    # ("￥", "zh", "SP2"), #加了货币计数所以人民币符不是SP2了
     ("^", "zh", "SP3"),
     # ('@', 'zh', "SP4")#不搞鬼畜了，和第二版保持一致吧
 ]
@@ -31,7 +31,7 @@ def clean_text(text, language, version=None):
     if language not in language_module_map:
         language = "en"
         text = " "
-    if language in ("zh"): #处理货币似乎只能这里截胡，不然货币符号会被吞
+    if language in ("zh"): #处理货币似乎最佳方案是这里截胡，不然可能被吞...
         from text.zh_normalization.num import (
             RE_CNY_PREFIX, RE_CNY_SUFFIX, replace_cny_prefix, replace_cny_suffix,
             RE_USD_SYMBOL, RE_USD_SUFFIX, replace_usd_symbol, replace_usd_suffix,)