Merge branch 'RVC-Boss:main' into main

2026-06-05 13:58:18 +08:00 · 2024-03-19 14:48:48 +08:00 · 2024-03-19 14:48:48 +08:00 · 8efd0fee50
commit 8efd0fee50
parent 7b0aa5be64 b451372316
6 changed files with 118624 additions and 113674 deletions
--- a/GPT_SoVITS/text/cmudict.rep
+++ b/GPT_SoVITS/text/cmudict.rep
--- a/GPT_SoVITS/text/engdict_cache.pickle
+++ b/GPT_SoVITS/text/engdict_cache.pickle
--- a/GPT_SoVITS/text/english.py
+++ b/GPT_SoVITS/text/english.py
@ -1,6 +1,7 @@
 import pickle
 import os
 import re
+import wordsegment
 from g2p_en import G2p

 from string import punctuation
@ -12,7 +13,6 @@ CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
 CMU_DICT_FAST_PATH = os.path.join(current_file_path, "cmudict-fast.rep")
 CMU_DICT_HOT_PATH = os.path.join(current_file_path, "engdict-hot.rep")
 CACHE_PATH = os.path.join(current_file_path, "engdict_cache.pickle")
-_g2p = G2p()

 arpa = {
    "AH0",
@ -90,7 +90,7 @@ arpa = {


 def replace_phs(phs):
-    rep_map = {";": ",", ":": ",", "'": "-", '"': "-"}
+    rep_map = {"'": "-"}
    phs_new = []
    for ph in phs:
        if ph in symbols:
@ -112,7 +112,7 @@ def read_dict():
            if line_index >= start_line:
                line = line.strip()
                word_split = line.split("  ")
-                word = word_split[0]
+                word = word_split[0].lower()

                syllable_split = word_split[1].split(" - ")
                g2p_dict[word] = []
@ -132,16 +132,11 @@ def read_dict_new():
        line = f.readline()
        line_index = 1
        while line:
-            if line_index >= 49:
+            if line_index >= 57:
                line = line.strip()
                word_split = line.split("  ")
-                word = word_split[0]
-
-                syllable_split = word_split[1].split(" - ")
-                g2p_dict[word] = []
-                for syllable in syllable_split:
-                    phone_split = syllable.split(" ")
-                    g2p_dict[word].append(phone_split)
+                word = word_split[0].lower()
+                g2p_dict[word] = [word_split[1].split(" ")]

            line_index = line_index + 1
            line = f.readline()
@ -153,10 +148,9 @@ def read_dict_new():
            if line_index >= 0:
                line = line.strip()
                word_split = line.split(" ")
-                word = word_split[0]
+                word = word_split[0].lower()
                if word not in g2p_dict:
-                    g2p_dict[word] = []
-                    g2p_dict[word].append(word_split[1:])
+                    g2p_dict[word] = [word_split[1:]]

            line_index = line_index + 1
            line = f.readline()
@ -168,10 +162,9 @@ def read_dict_new():
            if line_index >= 0:
                line = line.strip()
                word_split = line.split(" ")
-                word = word_split[0]
-                #if word not in g2p_dict:
-                g2p_dict[word] = []
-                g2p_dict[word].append(word_split[1:])
+                word = word_split[0].lower()
+                # 自定义发音词直接覆盖字典
+                g2p_dict[word] = [word_split[1:]]

            line_index = line_index + 1
            line = f.readline()
@ -200,24 +193,66 @@ eng_dict = get_dict()

 def text_normalize(text):
    # todo: eng text normalize
-    return text.replace(";", ",")
+    # 适配中文及 g2p_en 标点
+    rep_map = {
+        "[;:：，；]": ",",
+        '["’]': "'",
+        "。": ".",
+        "！": "!",
+        "？": "?",
+    }
+    for p, r in rep_map.items():
+        text = re.sub(p, r, text)
+
+    return text
+
+
+class en_G2p(G2p):
+    def __init__(self):
+        super().__init__()
+        # 分词初始化
+        wordsegment.load()
+
+        # 扩展过时字典
+        self.cmu = get_dict()
+
+        # 剔除读音错误的几个缩写
+        for word in ["AE", "AI", "AR", "IOS", "HUD", "OS"]:
+            del self.cmu[word.lower()]
+
+        # "A" 落单不读 "AH0" 读 "EY1"
+        self.cmu['a'] = [['EY1']]
+
+
+    def predict(self, word):
+        # 小写 oov 长度小于等于 3 直接读字母
+        if (len(word) <= 3):
+            return [phone for w in word for phone in self(w)]
+
+        # 尝试分离所有格
+        if re.match(r"^([a-z]+)('s)$", word):
+            phone = self(word[:-2])
+            phone.extend(['Z'])
+            return phone
+
+        # 尝试进行分词，应对复合词
+        comps = wordsegment.segment(word.lower())
+
+        # 无法分词的送回去预测
+        if len(comps)==1:
+            return super().predict(word)
+
+        # 可以分词的递归处理
+        return [phone for comp in comps for phone in self(comp)]
+
+
+_g2p = en_G2p()


 def g2p(text):
-    phones = []
-    words = re.split(r"([,;.\-\?\!\s+])", text)
-    for w in words:
-        if w.upper() in eng_dict:
-            phns = eng_dict[w.upper()]
-            for ph in phns:
-                phones += ph
-        else:
-            phone_list = list(filter(lambda p: p != " ", _g2p(w)))
-            for ph in phone_list:
-                if ph in arpa:
-                    phones.append(ph)
-                else:
-                    phones.append(ph)
+    # g2p_en 整段推理，剔除不存在的arpa返回
+    phone_list = _g2p(text)
+    phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]

    return replace_phs(phones)

--- a/docs/ko/Changelog_KO.md
+++ b/docs/ko/Changelog_KO.md
@ -89,6 +89,21 @@
 1. 참조 텍스트 입력을 지원합니다.
 2. 프론트엔드에 있던 중국어 텍스트 입력 버그를 수정하였습니다.

-todolist :
+### 20240221 업데이트

-1. 중국어 다음음자 추론 최적화
+1. 데이터 처리에 음성 노이즈 감소 옵션을 추가하였습니다. (노이즈 감소는 16k 샘플링률만 남기며, 노이즈가 크지 않다면 사용하지 마십시오.)
+2. 중국어 및 일본어 프론트엔드 처리를 최적화하였습니다. https://github.com/RVC-Boss/GPT-SoVITS/pull/559 https://github.com/RVC-Boss/GPT-SoVITS/pull/556 https://github.com/RVC-Boss/GPT-SoVITS/pull/532 https://github.com/RVC-Boss/GPT-SoVITS/pull/507 https://github.com/RVC-Boss/GPT-SoVITS/pull/509
+3. Mac에서 CPU 추론이 더 빨라졌으므로 추론 장치를 mps에서 CPU로 변경하였습니다.
+4. colab에서 공용 URL을 열지 않는 문제를 수정하였습니다.
+
+### 20240306 업데이트
+
+1. 추론 속도를 50% 빠르게 하였습니다. (RTX3090+pytorch2.2.1+cu11.8+win10+py39 테스트 완료) https://github.com/RVC-Boss/GPT-SoVITS/pull/672
+2. faster whisper를 사용할 때 중국어 ASR을 먼저 다운로드할 필요가 없습니다.
+3. uvr5의 잔향 제거 모델이 잔향이 있는지 여부를 반대로 반환하는 문제를 수정하였습니다.
+4. faster whisper가 CUDA를 사용할 수 없는 경우 자동으로 CPU 추론을 사용하도록 수정하였습니다.
+5. is_half의 판단을 수정하여 Mac에서 CPU 추론이 정상적으로 작동하도록 수정하였습니다.
+
+todolist：
+
+1. 중국어 다양한 발음 단어 추론 최적화(테스트 결과를 작성하시는 분은 pr 코멘트 영역에 작성해주시면 감사하겠습니다)
--- a/docs/ko/README.md
+++ b/docs/ko/README.md
@ -57,7 +57,7 @@ bash install.sh

 ### macOS

-**주의: Mac에서 GPU로 훈련된 모델은 다른 장치에서 훈련된 모델에 비해 현저히 낮은 품질을 나타내므로, 우리는 일시적으로 CPU를 사용하여 훈련하고 있습니다.**
+**주의: Mac에서 GPU로 훈련된 모델은 다른 OS에서 훈련된 모델에 비해 품질이 낮습니다. 해당 문제를 해결하기 전까지 MacOS에선 CPU를 사용하여 훈련을 진행합니다.**

 먼저 `brew install ffmpeg` 또는 `conda install ffmpeg`를 실행하여 FFmpeg가 설치되었는지 확인한 다음, 다음 명령어를 사용하여 설치하세요:

--- a/requirements.txt
+++ b/requirements.txt
@ -24,4 +24,5 @@ psutil
 jieba_fast
 jieba
 LangSegment>=0.2.0
-Faster_Whisper
+Faster_Whisper
+wordsegment