From b2cff0cd0abd0ac134a16ae7a9695f88e8826104 Mon Sep 17 00:00:00 2001 From: SapphireLab <36986837+SapphireLab@users.noreply.github.com> Date: Tue, 16 Jun 2026 21:46:53 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=A4=9A=E9=9F=B3=E5=AD=97=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=20(#2791)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: 固定唑的读音 * add: 增加md5检测以更新缓存 --- GPT_SoVITS/text/g2pw/g2pw.py | 24 ++++++++++++++++++++++-- GPT_SoVITS/text/g2pw/polyphonic-fix.rep | 3 ++- GPT_SoVITS/text/g2pw/polyphonic.md5 | 1 + GPT_SoVITS/text/g2pw/polyphonic.pickle | Bin 1322387 -> 1322401 bytes 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 GPT_SoVITS/text/g2pw/polyphonic.md5 diff --git a/GPT_SoVITS/text/g2pw/g2pw.py b/GPT_SoVITS/text/g2pw/g2pw.py index 08525e91..fbfcd096 100644 --- a/GPT_SoVITS/text/g2pw/g2pw.py +++ b/GPT_SoVITS/text/g2pw/g2pw.py @@ -1,5 +1,6 @@ # This code is modified from https://github.com/mozillazg/pypinyin-g2pW +import hashlib import pickle import os @@ -14,6 +15,16 @@ current_file_path = os.path.dirname(__file__) CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle") PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep") PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep") +MD5_PATH = os.path.join(current_file_path, "polyphonic.md5") + +def get_file_md5(file_path): + if not os.path.exists(file_path): + return "" + hasher = hashlib.md5() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hasher.update(chunk) + return hasher.hexdigest() class G2PWPinyin(Pinyin): @@ -115,13 +126,22 @@ def cache_dict(polyphonic_dict, file_path): def get_dict(): - if os.path.exists(CACHE_PATH): + new_md5 = get_file_md5(PP_DICT_PATH) + get_file_md5(PP_FIX_DICT_PATH) + old_md5 = "" + if os.path.exists(MD5_PATH): + with open(MD5_PATH, "r", encoding="utf-8") as f: + old_md5 = f.read().strip() + need_rebuild = (not os.path.exists(CACHE_PATH)) or (new_md5 != old_md5) + + if not need_rebuild: with open(CACHE_PATH, "rb") as pickle_file: polyphonic_dict = pickle.load(pickle_file) else: + print("Rebuilding Polyphonic Dictionary: " + f"{old_md5} -> {new_md5}") polyphonic_dict = read_dict() cache_dict(polyphonic_dict, CACHE_PATH) - + with open(MD5_PATH, "w", encoding="utf-8") as f: + f.write(new_md5) return polyphonic_dict diff --git a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep index 1800b886..51ebc145 100644 --- a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep +++ b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep @@ -45023,4 +45023,5 @@ 鼎铛玉石: ['ding3', 'cheng1', 'yu4', 'shi2'] 齿豁头童: ['chi3', 'huo1', 'tou2', 'tong2'] 牦牛: ['mao2', 'niu2'] -牦: ['mao2'] \ No newline at end of file +牦: ['mao2'] +唑: ['zuo4'] \ No newline at end of file diff --git a/GPT_SoVITS/text/g2pw/polyphonic.md5 b/GPT_SoVITS/text/g2pw/polyphonic.md5 new file mode 100644 index 00000000..24182cbb --- /dev/null +++ b/GPT_SoVITS/text/g2pw/polyphonic.md5 @@ -0,0 +1 @@ +13b2211c317c75794123ffdf7c2aea021c75b0c606ad61d7c8b05bb00b64fa21 \ No newline at end of file diff --git a/GPT_SoVITS/text/g2pw/polyphonic.pickle b/GPT_SoVITS/text/g2pw/polyphonic.pickle index fbe46ce1dcc498e4cc508f5a68e77bee3428a393..b749b06877d022e28bfa20da09560b8d0bb530d1 100644 GIT binary patch delta 72 zcmV-O0Jr~>+fbp~P=JI1gaU*Egam{Iga(8Mgb0KQgbIWUgbaiYgbsucgb;)ggc5`k eloa;?qP6%GnJf^D1Lc&FlwFi+FaZDnVRbI4M;R&r delta 58 zcmZ2@J7DtdfQA;v7N!>F7M2#)7Pc1l7LFFq7OocV7M>Q~7QPn#7J(MQ7NHj5DI(t) Or)>QpGE;}CR1W}P1{3rE