From 6fdc67ca83418306f11e90b9139278313ac5c3e9 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Tue, 17 Jun 2025 15:21:36 +0800
Subject: [PATCH 01/27] Fix bugs in `install.sh`, reduce log noise, and improve
 error reporting (#2464)

* Update Install.sh

* Format Code

* Delete dev null

* Update README, Support Dark Mode in CSS/JS
---
 GPT_SoVITS/AR/models/t2s_lightning_module.py  |   3 +-
 GPT_SoVITS/TTS_infer_pack/TTS.py              |  74 +++---
 GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py |   4 +-
 GPT_SoVITS/eres2net/ERes2Net.py               | 162 ++++++-------
 GPT_SoVITS/eres2net/ERes2NetV2.py             | 208 ++++++++--------
 GPT_SoVITS/eres2net/ERes2Net_huge.py          | 163 ++++++-------
 GPT_SoVITS/eres2net/fusion.py                 |   4 +-
 GPT_SoVITS/eres2net/kaldi.py                  |  49 +++-
 GPT_SoVITS/eres2net/pooling_layers.py         |  27 +--
 GPT_SoVITS/export_torch_script.py             |  59 +++--
 GPT_SoVITS/export_torch_script_v3v4.py        |  22 +-
 GPT_SoVITS/f5_tts/model/backbones/dit.py      |   8 +-
 GPT_SoVITS/module/data_utils.py               |  41 +++-
 GPT_SoVITS/module/models.py                   |  71 ++++--
 GPT_SoVITS/module/models_onnx.py              |   8 +-
 GPT_SoVITS/module/modules.py                  |   7 +-
 GPT_SoVITS/prepare_datasets/2-get-sv.py       |  50 ++--
 GPT_SoVITS/process_ckpt.py                    |  16 +-
 GPT_SoVITS/s2_train.py                        |  93 ++++++--
 GPT_SoVITS/sv.py                              |  30 ++-
 .../text/LangSegmenter/langsegmenter.py       | 107 +++++----
 GPT_SoVITS/text/g2pw/onnx_api.py              |   6 +-
 GPT_SoVITS/text/tone_sandhi.py                |   6 +-
 README.md                                     |  17 +-
 api.py                                        |  84 ++++---
 config.py                                     |  14 +-
 docker-compose.yaml                           |  16 --
 docs/cn/README.md                             |  17 +-
 docs/ja/README.md                             |  21 +-
 docs/ko/README.md                             |  19 +-
 docs/tr/README.md                             |  17 +-
 install.sh                                    | 222 ++++++++++++------
 tools/assets.py                               |  84 ++-----
 tools/my_utils.py                             |   2 +-
 tools/subfix_webui.py                         |   5 +-
 tools/uvr5/mdxnet.py                          |   4 +-
 tools/uvr5/vr.py                              |   8 +-
 webui.py                                      |   4 +-
 38 files changed, 982 insertions(+), 770 deletions(-)

diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module.py b/GPT_SoVITS/AR/models/t2s_lightning_module.py
index 97f3a084..fd357b94 100644
--- a/GPT_SoVITS/AR/models/t2s_lightning_module.py
+++ b/GPT_SoVITS/AR/models/t2s_lightning_module.py
@@ -28,7 +28,8 @@ class Text2SemanticLightningModule(LightningModule):
                 self.load_state_dict(
                     torch.load(
                         pretrained_s1,
-                        map_location="cpu", weights_only=False,
+                        map_location="cpu",
+                        weights_only=False,
                     )["weight"],
                 )
             )
diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py
index be936005..795b55dd 100644
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@@ -32,19 +32,21 @@ from transformers import AutoModelForMaskedLM, AutoTokenizer
 
 from tools.audio_sr import AP_BWE
 from tools.i18n.i18n import I18nAuto, scan_language_list
-from tools.my_utils import load_audio
 from TTS_infer_pack.text_segmentation_method import splits
 from TTS_infer_pack.TextPreprocessor import TextPreprocessor
 from sv import SV
-resample_transform_dict={}
-def resample(audio_tensor, sr0,sr1,device):
+
+resample_transform_dict = {}
+
+
+def resample(audio_tensor, sr0, sr1, device):
     global resample_transform_dict
-    key="%s-%s-%s"%(sr0,sr1,str(device))
+    key = "%s-%s-%s" % (sr0, sr1, str(device))
     if key not in resample_transform_dict:
-        resample_transform_dict[key] = torchaudio.transforms.Resample(
-            sr0, sr1
-        ).to(device)
+        resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
     return resample_transform_dict[key](audio_tensor)
+
+
 language = os.environ.get("language", "Auto")
 language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
 i18n = I18nAuto(language=language)
@@ -111,6 +113,7 @@ def speed_change(input_audio: np.ndarray, speed: float, sr: int):
 
     return processed_audio
 
+
 class DictToAttrRecursive(dict):
     def __init__(self, input_dict):
         super().__init__(input_dict)
@@ -479,7 +482,7 @@ class TTS:
     def init_vits_weights(self, weights_path: str):
         self.configs.vits_weights_path = weights_path
         version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(weights_path)
-        if "Pro"in model_version:
+        if "Pro" in model_version:
             self.init_sv_model()
         path_sovits = self.configs.default_configs[model_version]["vits_weights_path"]
 
@@ -498,9 +501,9 @@ class TTS:
         else:
             hps["model"]["version"] = "v2"
         version = hps["model"]["version"]
-        v3v4set={"v3", "v4"}
+        v3v4set = {"v3", "v4"}
         if model_version not in v3v4set:
-            if "Pro"not in model_version:
+            if "Pro" not in model_version:
                 model_version = version
             else:
                 hps["model"]["version"] = model_version
@@ -542,7 +545,7 @@ class TTS:
             if "pretrained" not in weights_path and hasattr(vits_model, "enc_q"):
                 del vits_model.enc_q
 
-        self.is_v2pro=model_version in {"v2Pro","v2ProPlus"}
+        self.is_v2pro = model_version in {"v2Pro", "v2ProPlus"}
 
         if if_lora_v3 == False:
             print(
@@ -632,7 +635,9 @@ class TTS:
             )
             self.vocoder.remove_weight_norm()
             state_dict_g = torch.load(
-                "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), map_location="cpu", weights_only=False
+                "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
+                map_location="cpu",
+                weights_only=False,
             )
             print("loading vocoder", self.vocoder.load_state_dict(state_dict_g))
 
@@ -752,11 +757,13 @@ class TTS:
 
         if raw_sr != self.configs.sampling_rate:
             audio = raw_audio.to(self.configs.device)
-            if (audio.shape[0] == 2): audio = audio.mean(0).unsqueeze(0)
+            if audio.shape[0] == 2:
+                audio = audio.mean(0).unsqueeze(0)
             audio = resample(audio, raw_sr, self.configs.sampling_rate, self.configs.device)
         else:
             audio = raw_audio.to(self.configs.device)
-            if (audio.shape[0] == 2): audio = audio.mean(0).unsqueeze(0)
+            if audio.shape[0] == 2:
+                audio = audio.mean(0).unsqueeze(0)
 
         maxx = audio.abs().max()
         if maxx > 1:
@@ -775,8 +782,9 @@ class TTS:
             audio = resample(audio, self.configs.sampling_rate, 16000, self.configs.device)
             if self.configs.is_half:
                 audio = audio.half()
-        else:audio=None
-        return spec,audio
+        else:
+            audio = None
+        return spec, audio
 
     def _set_prompt_semantic(self, ref_wav_path: str):
         zero_wav = np.zeros(
@@ -1073,7 +1081,10 @@ class TTS:
 
         ###### setting reference audio and prompt text preprocessing ########
         t0 = time.perf_counter()
-        if (ref_audio_path is not None) and (ref_audio_path != self.prompt_cache["ref_audio_path"] or (self.is_v2pro and self.prompt_cache["refer_spec"][0][1] is None)):
+        if (ref_audio_path is not None) and (
+            ref_audio_path != self.prompt_cache["ref_audio_path"]
+            or (self.is_v2pro and self.prompt_cache["refer_spec"][0][1] is None)
+        ):
             if not os.path.exists(ref_audio_path):
                 raise ValueError(f"{ref_audio_path} not exists")
             self.set_ref_audio(ref_audio_path)
@@ -1212,9 +1223,10 @@ class TTS:
                 t_34 += t4 - t3
 
                 refer_audio_spec = []
-                if self.is_v2pro:sv_emb=[]
-                for spec,audio_tensor in self.prompt_cache["refer_spec"]:
-                    spec=spec.to(dtype=self.precision, device=self.configs.device)
+                if self.is_v2pro:
+                    sv_emb = []
+                for spec, audio_tensor in self.prompt_cache["refer_spec"]:
+                    spec = spec.to(dtype=self.precision, device=self.configs.device)
                     refer_audio_spec.append(spec)
                     if self.is_v2pro:
                         sv_emb.append(self.sv_model.compute_embedding3(audio_tensor))
@@ -1249,10 +1261,14 @@ class TTS:
                             torch.cat(pred_semantic_list).unsqueeze(0).unsqueeze(0).to(self.configs.device)
                         )
                         _batch_phones = torch.cat(batch_phones).unsqueeze(0).to(self.configs.device)
-                        if self.is_v2pro!=True:
-                            _batch_audio_fragment = self.vits_model.decode(all_pred_semantic, _batch_phones, refer_audio_spec, speed=speed_factor).detach()[0, 0, :]
+                        if self.is_v2pro != True:
+                            _batch_audio_fragment = self.vits_model.decode(
+                                all_pred_semantic, _batch_phones, refer_audio_spec, speed=speed_factor
+                            ).detach()[0, 0, :]
                         else:
-                            _batch_audio_fragment = self.vits_model.decode(all_pred_semantic, _batch_phones, refer_audio_spec, speed=speed_factor,sv_emb=sv_emb).detach()[0, 0, :]
+                            _batch_audio_fragment = self.vits_model.decode(
+                                all_pred_semantic, _batch_phones, refer_audio_spec, speed=speed_factor, sv_emb=sv_emb
+                            ).detach()[0, 0, :]
                         audio_frag_end_idx.insert(0, 0)
                         batch_audio_fragment = [
                             _batch_audio_fragment[audio_frag_end_idx[i - 1] : audio_frag_end_idx[i]]
@@ -1266,9 +1282,13 @@ class TTS:
                                 pred_semantic_list[i][-idx:].unsqueeze(0).unsqueeze(0)
                             )  # .unsqueeze(0)#mq要多unsqueeze一次
                             if self.is_v2pro != True:
-                                audio_fragment = self.vits_model.decode(_pred_semantic, phones, refer_audio_spec, speed=speed_factor).detach()[0, 0, :]
+                                audio_fragment = self.vits_model.decode(
+                                    _pred_semantic, phones, refer_audio_spec, speed=speed_factor
+                                ).detach()[0, 0, :]
                             else:
-                                audio_fragment = self.vits_model.decode(_pred_semantic, phones, refer_audio_spec, speed=speed_factor,sv_emb=sv_emb).detach()[0, 0, :]
+                                audio_fragment = self.vits_model.decode(
+                                    _pred_semantic, phones, refer_audio_spec, speed=speed_factor, sv_emb=sv_emb
+                                ).detach()[0, 0, :]
                             batch_audio_fragment.append(audio_fragment)  ###试试重建不带上prompt部分
                 else:
                     if parallel_infer:
@@ -1410,7 +1430,7 @@ class TTS:
         raw_entry = self.prompt_cache["refer_spec"][0]
         if isinstance(raw_entry, tuple):
             raw_entry = raw_entry[0]
-        refer_audio_spec = raw_entry.to(dtype=self.precision,device=self.configs.device)
+        refer_audio_spec = raw_entry.to(dtype=self.precision, device=self.configs.device)
 
         fea_ref, ge = self.vits_model.decode_encp(prompt_semantic_tokens, prompt_phones, refer_audio_spec)
         ref_audio: torch.Tensor = self.prompt_cache["raw_audio"]
@@ -1480,7 +1500,7 @@ class TTS:
         raw_entry = self.prompt_cache["refer_spec"][0]
         if isinstance(raw_entry, tuple):
             raw_entry = raw_entry[0]
-        refer_audio_spec = raw_entry.to(dtype=self.precision,device=self.configs.device)
+        refer_audio_spec = raw_entry.to(dtype=self.precision, device=self.configs.device)
 
         fea_ref, ge = self.vits_model.decode_encp(prompt_semantic_tokens, prompt_phones, refer_audio_spec)
         ref_audio: torch.Tensor = self.prompt_cache["raw_audio"]
diff --git a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
index f03183a1..9a478d43 100644
--- a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
+++ b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
@@ -160,7 +160,9 @@ class TextPreprocessor:
                 else:
                     for tmp in LangSegmenter.getTexts(text):
                         if langlist:
-                            if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
+                            if (tmp["lang"] == "en" and langlist[-1] == "en") or (
+                                tmp["lang"] != "en" and langlist[-1] != "en"
+                            ):
                                 textlist[-1] += tmp["text"]
                                 continue
                         if tmp["lang"] == "en":
diff --git a/GPT_SoVITS/eres2net/ERes2Net.py b/GPT_SoVITS/eres2net/ERes2Net.py
index f728742d..1618c813 100644
--- a/GPT_SoVITS/eres2net/ERes2Net.py
+++ b/GPT_SoVITS/eres2net/ERes2Net.py
@@ -1,13 +1,12 @@
 # Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
 
-""" 
-    Res2Net implementation is adapted from https://github.com/wenet-e2e/wespeaker.
-    ERes2Net incorporates both local and global feature fusion techniques to improve the performance. 
-    The local feature fusion (LFF) fuses the features within one single residual block to extract the local signal.
-    The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal.
 """
-
+Res2Net implementation is adapted from https://github.com/wenet-e2e/wespeaker.
+ERes2Net incorporates both local and global feature fusion techniques to improve the performance.
+The local feature fusion (LFF) fuses the features within one single residual block to extract the local signal.
+The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal.
+"""
 
 import torch
 import math
@@ -16,15 +15,14 @@ import torch.nn.functional as F
 import pooling_layers as pooling_layers
 from fusion import AFF
 
-class ReLU(nn.Hardtanh):
 
+class ReLU(nn.Hardtanh):
     def __init__(self, inplace=False):
         super(ReLU, self).__init__(0, 20, inplace)
 
     def __repr__(self):
-        inplace_str = 'inplace' if self.inplace else ''
-        return self.__class__.__name__ + ' (' \
-            + inplace_str + ')'
+        inplace_str = "inplace" if self.inplace else ""
+        return self.__class__.__name__ + " (" + inplace_str + ")"
 
 
 class BasicBlockERes2Net(nn.Module):
@@ -32,28 +30,28 @@ class BasicBlockERes2Net(nn.Module):
 
     def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2):
         super(BasicBlockERes2Net, self).__init__()
-        width = int(math.floor(planes*(baseWidth/64.0)))
-        self.conv1 = nn.Conv2d(in_planes, width*scale, kernel_size=1, stride=stride, bias=False)
-        self.bn1 = nn.BatchNorm2d(width*scale)
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = nn.Conv2d(in_planes, width * scale, kernel_size=1, stride=stride, bias=False)
+        self.bn1 = nn.BatchNorm2d(width * scale)
         self.nums = scale
 
-        convs=[]
-        bns=[]
+        convs = []
+        bns = []
         for i in range(self.nums):
-        	convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
-        	bns.append(nn.BatchNorm2d(width))
+            convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
+            bns.append(nn.BatchNorm2d(width))
         self.convs = nn.ModuleList(convs)
         self.bns = nn.ModuleList(bns)
         self.relu = ReLU(inplace=True)
-        
-        self.conv3 = nn.Conv2d(width*scale, planes*self.expansion, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes*self.expansion)
+
+        self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.shortcut = nn.Sequential()
         if stride != 1 or in_planes != self.expansion * planes:
             self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1,
-                          stride=stride, bias=False),
-                nn.BatchNorm2d(self.expansion * planes))
+                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
         self.stride = stride
         self.width = width
         self.scale = scale
@@ -64,18 +62,18 @@ class BasicBlockERes2Net(nn.Module):
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
-        spx = torch.split(out,self.width,1)
+        spx = torch.split(out, self.width, 1)
         for i in range(self.nums):
-        	if i==0:
-        		sp = spx[i]
-        	else:
-        		sp = sp + spx[i]
-        	sp = self.convs[i](sp)
-        	sp = self.relu(self.bns[i](sp))
-        	if i==0:
-        		out = sp
-        	else:
-        		out = torch.cat((out,sp),1)
+            if i == 0:
+                sp = spx[i]
+            else:
+                sp = sp + spx[i]
+            sp = self.convs[i](sp)
+            sp = self.relu(self.bns[i](sp))
+            if i == 0:
+                out = sp
+            else:
+                out = torch.cat((out, sp), 1)
 
         out = self.conv3(out)
         out = self.bn3(out)
@@ -86,22 +84,23 @@ class BasicBlockERes2Net(nn.Module):
 
         return out
 
+
 class BasicBlockERes2Net_diff_AFF(nn.Module):
     expansion = 2
 
     def __init__(self, in_planes, planes, stride=1, baseWidth=32, scale=2):
         super(BasicBlockERes2Net_diff_AFF, self).__init__()
-        width = int(math.floor(planes*(baseWidth/64.0)))
-        self.conv1 = nn.Conv2d(in_planes, width*scale, kernel_size=1, stride=stride, bias=False)
-        self.bn1 = nn.BatchNorm2d(width*scale)
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = nn.Conv2d(in_planes, width * scale, kernel_size=1, stride=stride, bias=False)
+        self.bn1 = nn.BatchNorm2d(width * scale)
         self.nums = scale
 
-        convs=[]
-        fuse_models=[]
-        bns=[]
+        convs = []
+        fuse_models = []
+        bns = []
         for i in range(self.nums):
-        	convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
-        	bns.append(nn.BatchNorm2d(width))
+            convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
+            bns.append(nn.BatchNorm2d(width))
         for j in range(self.nums - 1):
             fuse_models.append(AFF(channels=width))
 
@@ -109,15 +108,15 @@ class BasicBlockERes2Net_diff_AFF(nn.Module):
         self.bns = nn.ModuleList(bns)
         self.fuse_models = nn.ModuleList(fuse_models)
         self.relu = ReLU(inplace=True)
-        
-        self.conv3 = nn.Conv2d(width*scale, planes*self.expansion, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes*self.expansion)
+
+        self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.shortcut = nn.Sequential()
         if stride != 1 or in_planes != self.expansion * planes:
             self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1,
-                          stride=stride, bias=False),
-                nn.BatchNorm2d(self.expansion * planes))
+                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
         self.stride = stride
         self.width = width
         self.scale = scale
@@ -128,19 +127,19 @@ class BasicBlockERes2Net_diff_AFF(nn.Module):
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
-        spx = torch.split(out,self.width,1)     
+        spx = torch.split(out, self.width, 1)
         for i in range(self.nums):
-            if i==0:
+            if i == 0:
                 sp = spx[i]
             else:
-                sp = self.fuse_models[i-1](sp, spx[i])
-                
+                sp = self.fuse_models[i - 1](sp, spx[i])
+
             sp = self.convs[i](sp)
             sp = self.relu(self.bns[i](sp))
-            if i==0:
+            if i == 0:
                 out = sp
             else:
-                out = torch.cat((out,sp),1)
+                out = torch.cat((out, sp), 1)
 
         out = self.conv3(out)
         out = self.bn3(out)
@@ -151,16 +150,19 @@ class BasicBlockERes2Net_diff_AFF(nn.Module):
 
         return out
 
+
 class ERes2Net(nn.Module):
-    def __init__(self,
-                 block=BasicBlockERes2Net,
-                 block_fuse=BasicBlockERes2Net_diff_AFF,
-                 num_blocks=[3, 4, 6, 3],
-                 m_channels=32,
-                 feat_dim=80,
-                 embedding_size=192,
-                 pooling_func='TSTP',
-                 two_emb_layer=False):
+    def __init__(
+        self,
+        block=BasicBlockERes2Net,
+        block_fuse=BasicBlockERes2Net_diff_AFF,
+        num_blocks=[3, 4, 6, 3],
+        m_channels=32,
+        feat_dim=80,
+        embedding_size=192,
+        pooling_func="TSTP",
+        two_emb_layer=False,
+    ):
         super(ERes2Net, self).__init__()
         self.in_planes = m_channels
         self.feat_dim = feat_dim
@@ -176,20 +178,24 @@ class ERes2Net(nn.Module):
         self.layer4 = self._make_layer(block_fuse, m_channels * 8, num_blocks[3], stride=2)
 
         # Downsampling module for each layer
-        self.layer1_downsample = nn.Conv2d(m_channels * 2, m_channels * 4, kernel_size=3, stride=2, padding=1, bias=False)
-        self.layer2_downsample = nn.Conv2d(m_channels * 4, m_channels * 8, kernel_size=3, padding=1, stride=2, bias=False)
-        self.layer3_downsample = nn.Conv2d(m_channels * 8, m_channels * 16, kernel_size=3, padding=1, stride=2, bias=False)
+        self.layer1_downsample = nn.Conv2d(
+            m_channels * 2, m_channels * 4, kernel_size=3, stride=2, padding=1, bias=False
+        )
+        self.layer2_downsample = nn.Conv2d(
+            m_channels * 4, m_channels * 8, kernel_size=3, padding=1, stride=2, bias=False
+        )
+        self.layer3_downsample = nn.Conv2d(
+            m_channels * 8, m_channels * 16, kernel_size=3, padding=1, stride=2, bias=False
+        )
 
         # Bottom-up fusion module
         self.fuse_mode12 = AFF(channels=m_channels * 4)
         self.fuse_mode123 = AFF(channels=m_channels * 8)
         self.fuse_mode1234 = AFF(channels=m_channels * 16)
 
-        self.n_stats = 1 if pooling_func == 'TAP' or pooling_func == "TSDP" else 2
-        self.pool = getattr(pooling_layers, pooling_func)(
-            in_dim=self.stats_dim * block.expansion)
-        self.seg_1 = nn.Linear(self.stats_dim * block.expansion * self.n_stats,
-                               embedding_size)
+        self.n_stats = 1 if pooling_func == "TAP" or pooling_func == "TSDP" else 2
+        self.pool = getattr(pooling_layers, pooling_func)(in_dim=self.stats_dim * block.expansion)
+        self.seg_1 = nn.Linear(self.stats_dim * block.expansion * self.n_stats, embedding_size)
         if self.two_emb_layer:
             self.seg_bn_1 = nn.BatchNorm1d(embedding_size, affine=False)
             self.seg_2 = nn.Linear(embedding_size, embedding_size)
@@ -212,7 +218,7 @@ class ERes2Net(nn.Module):
         out1 = self.layer1(out)
         out2 = self.layer2(out1)
         out1_downsample = self.layer1_downsample(out1)
-        fuse_out12 = self.fuse_mode12(out2, out1_downsample)   
+        fuse_out12 = self.fuse_mode12(out2, out1_downsample)
         out3 = self.layer3(out2)
         fuse_out12_downsample = self.layer2_downsample(fuse_out12)
         fuse_out123 = self.fuse_mode123(out3, fuse_out12_downsample)
@@ -243,18 +249,16 @@ class ERes2Net(nn.Module):
         fuse_out123 = self.fuse_mode123(out3, fuse_out12_downsample)
         out4 = self.layer4(out3)
         fuse_out123_downsample = self.layer3_downsample(fuse_out123)
-        fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample).flatten(start_dim=1,end_dim=2).mean(-1)
+        fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample).flatten(start_dim=1, end_dim=2).mean(-1)
         return fuse_out1234
 
 
-if __name__ == '__main__':
-
+if __name__ == "__main__":
     x = torch.zeros(10, 300, 80)
-    model = ERes2Net(feat_dim=80, embedding_size=192, pooling_func='TSTP')
+    model = ERes2Net(feat_dim=80, embedding_size=192, pooling_func="TSTP")
     model.eval()
     out = model(x)
-    print(out.shape) # torch.Size([10, 192])
+    print(out.shape)  # torch.Size([10, 192])
 
     num_params = sum(param.numel() for param in model.parameters())
-    print("{} M".format(num_params / 1e6)) # 6.61M
-
+    print("{} M".format(num_params / 1e6))  # 6.61M
diff --git a/GPT_SoVITS/eres2net/ERes2NetV2.py b/GPT_SoVITS/eres2net/ERes2NetV2.py
index fdfd6db4..2e152a41 100644
--- a/GPT_SoVITS/eres2net/ERes2NetV2.py
+++ b/GPT_SoVITS/eres2net/ERes2NetV2.py
@@ -1,14 +1,12 @@
 # Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
 
-""" 
-    To further improve the short-duration feature extraction capability of ERes2Net, we expand the channel dimension 
-    within each stage. However, this modification also increases the number of model parameters and computational complexity. 
-    To alleviate this problem, we propose an improved ERes2NetV2 by pruning redundant structures, ultimately reducing 
-    both the model parameters and its computational cost.
 """
-
-
+To further improve the short-duration feature extraction capability of ERes2Net, we expand the channel dimension
+within each stage. However, this modification also increases the number of model parameters and computational complexity.
+To alleviate this problem, we propose an improved ERes2NetV2 by pruning redundant structures, ultimately reducing
+both the model parameters and its computational cost.
+"""
 
 import torch
 import math
@@ -17,47 +15,42 @@ import torch.nn.functional as F
 import pooling_layers as pooling_layers
 from fusion import AFF
 
-class ReLU(nn.Hardtanh):
 
+class ReLU(nn.Hardtanh):
     def __init__(self, inplace=False):
         super(ReLU, self).__init__(0, 20, inplace)
 
     def __repr__(self):
-        inplace_str = 'inplace' if self.inplace else ''
-        return self.__class__.__name__ + ' (' \
-            + inplace_str + ')'
+        inplace_str = "inplace" if self.inplace else ""
+        return self.__class__.__name__ + " (" + inplace_str + ")"
 
 
 class BasicBlockERes2NetV2(nn.Module):
-
     def __init__(self, in_planes, planes, stride=1, baseWidth=26, scale=2, expansion=2):
         super(BasicBlockERes2NetV2, self).__init__()
-        width = int(math.floor(planes*(baseWidth/64.0)))
-        self.conv1 = nn.Conv2d(in_planes, width*scale, kernel_size=1, stride=stride, bias=False)
-        self.bn1 = nn.BatchNorm2d(width*scale)
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = nn.Conv2d(in_planes, width * scale, kernel_size=1, stride=stride, bias=False)
+        self.bn1 = nn.BatchNorm2d(width * scale)
         self.nums = scale
         self.expansion = expansion
 
-        convs=[]
-        bns=[]
+        convs = []
+        bns = []
         for i in range(self.nums):
-        	convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
-        	bns.append(nn.BatchNorm2d(width))
+            convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
+            bns.append(nn.BatchNorm2d(width))
         self.convs = nn.ModuleList(convs)
         self.bns = nn.ModuleList(bns)
         self.relu = ReLU(inplace=True)
-        
-        self.conv3 = nn.Conv2d(width*scale, planes*self.expansion, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes*self.expansion)
+
+        self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.shortcut = nn.Sequential()
         if stride != 1 or in_planes != self.expansion * planes:
             self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes,
-                          self.expansion * planes,
-                          kernel_size=1,
-                          stride=stride,
-                          bias=False),
-                nn.BatchNorm2d(self.expansion * planes))
+                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
         self.stride = stride
         self.width = width
         self.scale = scale
@@ -68,18 +61,18 @@ class BasicBlockERes2NetV2(nn.Module):
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
-        spx = torch.split(out,self.width,1)
+        spx = torch.split(out, self.width, 1)
         for i in range(self.nums):
-        	if i==0:
-        		sp = spx[i]
-        	else:
-        		sp = sp + spx[i]
-        	sp = self.convs[i](sp)
-        	sp = self.relu(self.bns[i](sp))
-        	if i==0:
-        		out = sp
-        	else:
-        		out = torch.cat((out,sp),1)
+            if i == 0:
+                sp = spx[i]
+            else:
+                sp = sp + spx[i]
+            sp = self.convs[i](sp)
+            sp = self.relu(self.bns[i](sp))
+            if i == 0:
+                out = sp
+            else:
+                out = torch.cat((out, sp), 1)
 
         out = self.conv3(out)
         out = self.bn3(out)
@@ -90,22 +83,22 @@ class BasicBlockERes2NetV2(nn.Module):
 
         return out
 
-class BasicBlockERes2NetV2AFF(nn.Module):
 
+class BasicBlockERes2NetV2AFF(nn.Module):
     def __init__(self, in_planes, planes, stride=1, baseWidth=26, scale=2, expansion=2):
         super(BasicBlockERes2NetV2AFF, self).__init__()
-        width = int(math.floor(planes*(baseWidth/64.0)))
-        self.conv1 = nn.Conv2d(in_planes, width*scale, kernel_size=1, stride=stride, bias=False)
-        self.bn1 = nn.BatchNorm2d(width*scale)
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = nn.Conv2d(in_planes, width * scale, kernel_size=1, stride=stride, bias=False)
+        self.bn1 = nn.BatchNorm2d(width * scale)
         self.nums = scale
         self.expansion = expansion
 
-        convs=[]
-        fuse_models=[]
-        bns=[]
+        convs = []
+        fuse_models = []
+        bns = []
         for i in range(self.nums):
-        	convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
-        	bns.append(nn.BatchNorm2d(width))
+            convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
+            bns.append(nn.BatchNorm2d(width))
         for j in range(self.nums - 1):
             fuse_models.append(AFF(channels=width, r=4))
 
@@ -113,18 +106,15 @@ class BasicBlockERes2NetV2AFF(nn.Module):
         self.bns = nn.ModuleList(bns)
         self.fuse_models = nn.ModuleList(fuse_models)
         self.relu = ReLU(inplace=True)
-        
-        self.conv3 = nn.Conv2d(width*scale, planes*self.expansion, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes*self.expansion)
+
+        self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.shortcut = nn.Sequential()
         if stride != 1 or in_planes != self.expansion * planes:
             self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes,
-                          self.expansion * planes,
-                          kernel_size=1,
-                          stride=stride,
-                          bias=False),
-                nn.BatchNorm2d(self.expansion * planes))
+                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
         self.stride = stride
         self.width = width
         self.scale = scale
@@ -135,19 +125,19 @@ class BasicBlockERes2NetV2AFF(nn.Module):
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
-        spx = torch.split(out,self.width,1)     
+        spx = torch.split(out, self.width, 1)
         for i in range(self.nums):
-            if i==0:
+            if i == 0:
                 sp = spx[i]
             else:
-                sp = self.fuse_models[i-1](sp, spx[i])
-                
+                sp = self.fuse_models[i - 1](sp, spx[i])
+
             sp = self.convs[i](sp)
             sp = self.relu(self.bns[i](sp))
-            if i==0:
+            if i == 0:
                 out = sp
             else:
-                out = torch.cat((out,sp),1)
+                out = torch.cat((out, sp), 1)
 
         out = self.conv3(out)
         out = self.bn3(out)
@@ -158,19 +148,22 @@ class BasicBlockERes2NetV2AFF(nn.Module):
 
         return out
 
+
 class ERes2NetV2(nn.Module):
-    def __init__(self,
-                 block=BasicBlockERes2NetV2,
-                 block_fuse=BasicBlockERes2NetV2AFF,
-                 num_blocks=[3, 4, 6, 3],
-                 m_channels=64,
-                 feat_dim=80,
-                 embedding_size=192,
-                 baseWidth=26,
-                 scale=2,
-                 expansion=2,
-                 pooling_func='TSTP',
-                 two_emb_layer=False):
+    def __init__(
+        self,
+        block=BasicBlockERes2NetV2,
+        block_fuse=BasicBlockERes2NetV2AFF,
+        num_blocks=[3, 4, 6, 3],
+        m_channels=64,
+        feat_dim=80,
+        embedding_size=192,
+        baseWidth=26,
+        scale=2,
+        expansion=2,
+        pooling_func="TSTP",
+        two_emb_layer=False,
+    ):
         super(ERes2NetV2, self).__init__()
         self.in_planes = m_channels
         self.feat_dim = feat_dim
@@ -181,42 +174,29 @@ class ERes2NetV2(nn.Module):
         self.scale = scale
         self.expansion = expansion
 
-        self.conv1 = nn.Conv2d(1,
-                               m_channels,
-                               kernel_size=3,
-                               stride=1,
-                               padding=1,
-                               bias=False)
+        self.conv1 = nn.Conv2d(1, m_channels, kernel_size=3, stride=1, padding=1, bias=False)
         self.bn1 = nn.BatchNorm2d(m_channels)
-        self.layer1 = self._make_layer(block,
-                                       m_channels,
-                                       num_blocks[0],
-                                       stride=1)
-        self.layer2 = self._make_layer(block,
-                                       m_channels * 2,
-                                       num_blocks[1],
-                                       stride=2)
-        self.layer3 = self._make_layer(block_fuse,
-                                       m_channels * 4,
-                                       num_blocks[2],
-                                       stride=2)
-        self.layer4 = self._make_layer(block_fuse,
-                                       m_channels * 8,
-                                       num_blocks[3],
-                                       stride=2)
+        self.layer1 = self._make_layer(block, m_channels, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, m_channels * 2, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block_fuse, m_channels * 4, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block_fuse, m_channels * 8, num_blocks[3], stride=2)
 
         # Downsampling module
-        self.layer3_ds = nn.Conv2d(m_channels * 4 * self.expansion, m_channels * 8 * self.expansion, kernel_size=3, \
-                                   padding=1, stride=2, bias=False)
+        self.layer3_ds = nn.Conv2d(
+            m_channels * 4 * self.expansion,
+            m_channels * 8 * self.expansion,
+            kernel_size=3,
+            padding=1,
+            stride=2,
+            bias=False,
+        )
 
         # Bottom-up fusion module
         self.fuse34 = AFF(channels=m_channels * 8 * self.expansion, r=4)
 
-        self.n_stats = 1 if pooling_func == 'TAP' or pooling_func == "TSDP" else 2
-        self.pool = getattr(pooling_layers, pooling_func)(
-            in_dim=self.stats_dim * self.expansion)
-        self.seg_1 = nn.Linear(self.stats_dim * self.expansion * self.n_stats,
-                               embedding_size)
+        self.n_stats = 1 if pooling_func == "TAP" or pooling_func == "TSDP" else 2
+        self.pool = getattr(pooling_layers, pooling_func)(in_dim=self.stats_dim * self.expansion)
+        self.seg_1 = nn.Linear(self.stats_dim * self.expansion * self.n_stats, embedding_size)
         if self.two_emb_layer:
             self.seg_bn_1 = nn.BatchNorm1d(embedding_size, affine=False)
             self.seg_2 = nn.Linear(embedding_size, embedding_size)
@@ -228,7 +208,11 @@ class ERes2NetV2(nn.Module):
         strides = [stride] + [1] * (num_blocks - 1)
         layers = []
         for stride in strides:
-            layers.append(block(self.in_planes, planes, stride, baseWidth=self.baseWidth, scale=self.scale, expansion=self.expansion))
+            layers.append(
+                block(
+                    self.in_planes, planes, stride, baseWidth=self.baseWidth, scale=self.scale, expansion=self.expansion
+                )
+            )
             self.in_planes = planes * self.expansion
         return nn.Sequential(*layers)
 
@@ -264,7 +248,7 @@ class ERes2NetV2(nn.Module):
         out3_ds = self.layer3_ds(out3)
         fuse_out34 = self.fuse34(out4, out3_ds)
         # print(111111111,fuse_out34.shape)#111111111 torch.Size([16, 2048, 10, 72])
-        return fuse_out34.flatten(start_dim=1,end_dim=2).mean(-1)
+        return fuse_out34.flatten(start_dim=1, end_dim=2).mean(-1)
         # stats = self.pool(fuse_out34)
         #
         # embed_a = self.seg_1(stats)
@@ -276,17 +260,13 @@ class ERes2NetV2(nn.Module):
         # else:
         #     return embed_a
 
-if __name__ == '__main__':
 
+if __name__ == "__main__":
     x = torch.randn(1, 300, 80)
     model = ERes2NetV2(feat_dim=80, embedding_size=192, m_channels=64, baseWidth=26, scale=2, expansion=2)
     model.eval()
     y = model(x)
     print(y.size())
-    macs, num_params = profile(model, inputs=(x, ))
-    print("Params: {} M".format(num_params / 1e6)) # 17.86 M
-    print("MACs: {} G".format(macs / 1e9)) # 12.69 G
-
-
-
-
+    macs, num_params = profile(model, inputs=(x,))
+    print("Params: {} M".format(num_params / 1e6))  # 17.86 M
+    print("MACs: {} G".format(macs / 1e9))  # 12.69 G
diff --git a/GPT_SoVITS/eres2net/ERes2Net_huge.py b/GPT_SoVITS/eres2net/ERes2Net_huge.py
index 0ea82d39..0f04236b 100644
--- a/GPT_SoVITS/eres2net/ERes2Net_huge.py
+++ b/GPT_SoVITS/eres2net/ERes2Net_huge.py
@@ -1,14 +1,13 @@
 # Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
 
-""" Res2Net implementation is adapted from https://github.com/wenet-e2e/wespeaker.
-    ERes2Net incorporates both local and global feature fusion techniques to improve the performance. 
-    The local feature fusion (LFF) fuses the features within one single residual block to extract the local signal.
-    The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal.
-    ERes2Net-huge is an upgraded version of ERes2Net that uses a larger number of parameters to achieve better 
-    recognition performance. Parameters expansion, baseWidth, and scale can be modified to obtain optimal performance.
+"""Res2Net implementation is adapted from https://github.com/wenet-e2e/wespeaker.
+ERes2Net incorporates both local and global feature fusion techniques to improve the performance.
+The local feature fusion (LFF) fuses the features within one single residual block to extract the local signal.
+The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal.
+ERes2Net-huge is an upgraded version of ERes2Net that uses a larger number of parameters to achieve better
+recognition performance. Parameters expansion, baseWidth, and scale can be modified to obtain optimal performance.
 """
-import pdb
 
 import torch
 import math
@@ -17,15 +16,14 @@ import torch.nn.functional as F
 import pooling_layers as pooling_layers
 from fusion import AFF
 
-class ReLU(nn.Hardtanh):
 
+class ReLU(nn.Hardtanh):
     def __init__(self, inplace=False):
         super(ReLU, self).__init__(0, 20, inplace)
 
     def __repr__(self):
-        inplace_str = 'inplace' if self.inplace else ''
-        return self.__class__.__name__ + ' (' \
-            + inplace_str + ')'
+        inplace_str = "inplace" if self.inplace else ""
+        return self.__class__.__name__ + " (" + inplace_str + ")"
 
 
 class BasicBlockERes2Net(nn.Module):
@@ -33,27 +31,28 @@ class BasicBlockERes2Net(nn.Module):
 
     def __init__(self, in_planes, planes, stride=1, baseWidth=24, scale=3):
         super(BasicBlockERes2Net, self).__init__()
-        width = int(math.floor(planes*(baseWidth/64.0)))
-        self.conv1 = nn.Conv2d(in_planes, width*scale, kernel_size=1, stride=stride, bias=False)
-        self.bn1 = nn.BatchNorm2d(width*scale)
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = nn.Conv2d(in_planes, width * scale, kernel_size=1, stride=stride, bias=False)
+        self.bn1 = nn.BatchNorm2d(width * scale)
         self.nums = scale
 
-        convs=[]
-        bns=[]
+        convs = []
+        bns = []
         for i in range(self.nums):
-        	convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
-        	bns.append(nn.BatchNorm2d(width))
+            convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
+            bns.append(nn.BatchNorm2d(width))
         self.convs = nn.ModuleList(convs)
         self.bns = nn.ModuleList(bns)
         self.relu = ReLU(inplace=True)
-        
-        self.conv3 = nn.Conv2d(width*scale, planes*self.expansion, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes*self.expansion)
+
+        self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.shortcut = nn.Sequential()
         if stride != 1 or in_planes != self.expansion * planes:
             self.shortcut = nn.Sequential(
                 nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(self.expansion * planes))
+                nn.BatchNorm2d(self.expansion * planes),
+            )
         self.stride = stride
         self.width = width
         self.scale = scale
@@ -64,18 +63,18 @@ class BasicBlockERes2Net(nn.Module):
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
-        spx = torch.split(out,self.width,1)
+        spx = torch.split(out, self.width, 1)
         for i in range(self.nums):
-        	if i==0:
-        		sp = spx[i]
-        	else:
-        		sp = sp + spx[i]
-        	sp = self.convs[i](sp)
-        	sp = self.relu(self.bns[i](sp))
-        	if i==0:
-        		out = sp
-        	else:
-        		out = torch.cat((out,sp),1)
+            if i == 0:
+                sp = spx[i]
+            else:
+                sp = sp + spx[i]
+            sp = self.convs[i](sp)
+            sp = self.relu(self.bns[i](sp))
+            if i == 0:
+                out = sp
+            else:
+                out = torch.cat((out, sp), 1)
 
         out = self.conv3(out)
         out = self.bn3(out)
@@ -86,22 +85,23 @@ class BasicBlockERes2Net(nn.Module):
 
         return out
 
+
 class BasicBlockERes2Net_diff_AFF(nn.Module):
     expansion = 4
 
     def __init__(self, in_planes, planes, stride=1, baseWidth=24, scale=3):
         super(BasicBlockERes2Net_diff_AFF, self).__init__()
-        width = int(math.floor(planes*(baseWidth/64.0)))
-        self.conv1 = nn.Conv2d(in_planes, width*scale, kernel_size=1, stride=stride, bias=False)
-        self.bn1 = nn.BatchNorm2d(width*scale)
+        width = int(math.floor(planes * (baseWidth / 64.0)))
+        self.conv1 = nn.Conv2d(in_planes, width * scale, kernel_size=1, stride=stride, bias=False)
+        self.bn1 = nn.BatchNorm2d(width * scale)
         self.nums = scale
 
-        convs=[]
-        fuse_models=[]
-        bns=[]
+        convs = []
+        fuse_models = []
+        bns = []
         for i in range(self.nums):
-        	convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
-        	bns.append(nn.BatchNorm2d(width))
+            convs.append(nn.Conv2d(width, width, kernel_size=3, padding=1, bias=False))
+            bns.append(nn.BatchNorm2d(width))
         for j in range(self.nums - 1):
             fuse_models.append(AFF(channels=width))
 
@@ -109,14 +109,15 @@ class BasicBlockERes2Net_diff_AFF(nn.Module):
         self.bns = nn.ModuleList(bns)
         self.fuse_models = nn.ModuleList(fuse_models)
         self.relu = ReLU(inplace=True)
-        
-        self.conv3 = nn.Conv2d(width*scale, planes*self.expansion, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes*self.expansion)
+
+        self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.shortcut = nn.Sequential()
         if stride != 1 or in_planes != self.expansion * planes:
             self.shortcut = nn.Sequential(
                 nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(self.expansion * planes))
+                nn.BatchNorm2d(self.expansion * planes),
+            )
         self.stride = stride
         self.width = width
         self.scale = scale
@@ -127,20 +128,19 @@ class BasicBlockERes2Net_diff_AFF(nn.Module):
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
-        spx = torch.split(out,self.width,1)     
+        spx = torch.split(out, self.width, 1)
         for i in range(self.nums):
-            if i==0:
+            if i == 0:
                 sp = spx[i]
             else:
-                sp = self.fuse_models[i-1](sp, spx[i])
-                
+                sp = self.fuse_models[i - 1](sp, spx[i])
+
             sp = self.convs[i](sp)
             sp = self.relu(self.bns[i](sp))
-            if i==0:
+            if i == 0:
                 out = sp
             else:
-                out = torch.cat((out,sp),1)
-        
+                out = torch.cat((out, sp), 1)
 
         out = self.conv3(out)
         out = self.bn3(out)
@@ -151,16 +151,19 @@ class BasicBlockERes2Net_diff_AFF(nn.Module):
 
         return out
 
+
 class ERes2Net(nn.Module):
-    def __init__(self,
-                 block=BasicBlockERes2Net,
-                 block_fuse=BasicBlockERes2Net_diff_AFF,
-                 num_blocks=[3, 4, 6, 3],
-                 m_channels=64,
-                 feat_dim=80,
-                 embedding_size=192,
-                 pooling_func='TSTP',
-                 two_emb_layer=False):
+    def __init__(
+        self,
+        block=BasicBlockERes2Net,
+        block_fuse=BasicBlockERes2Net_diff_AFF,
+        num_blocks=[3, 4, 6, 3],
+        m_channels=64,
+        feat_dim=80,
+        embedding_size=192,
+        pooling_func="TSTP",
+        two_emb_layer=False,
+    ):
         super(ERes2Net, self).__init__()
         self.in_planes = m_channels
         self.feat_dim = feat_dim
@@ -176,17 +179,22 @@ class ERes2Net(nn.Module):
         self.layer3 = self._make_layer(block_fuse, m_channels * 4, num_blocks[2], stride=2)
         self.layer4 = self._make_layer(block_fuse, m_channels * 8, num_blocks[3], stride=2)
 
-        self.layer1_downsample = nn.Conv2d(m_channels * 4, m_channels * 8, kernel_size=3, padding=1, stride=2, bias=False)
-        self.layer2_downsample = nn.Conv2d(m_channels * 8, m_channels * 16, kernel_size=3, padding=1, stride=2, bias=False)
-        self.layer3_downsample = nn.Conv2d(m_channels * 16, m_channels * 32, kernel_size=3, padding=1, stride=2, bias=False)
+        self.layer1_downsample = nn.Conv2d(
+            m_channels * 4, m_channels * 8, kernel_size=3, padding=1, stride=2, bias=False
+        )
+        self.layer2_downsample = nn.Conv2d(
+            m_channels * 8, m_channels * 16, kernel_size=3, padding=1, stride=2, bias=False
+        )
+        self.layer3_downsample = nn.Conv2d(
+            m_channels * 16, m_channels * 32, kernel_size=3, padding=1, stride=2, bias=False
+        )
 
         self.fuse_mode12 = AFF(channels=m_channels * 8)
         self.fuse_mode123 = AFF(channels=m_channels * 16)
         self.fuse_mode1234 = AFF(channels=m_channels * 32)
 
-        self.n_stats = 1 if pooling_func == 'TAP' or pooling_func == "TSDP" else 2
-        self.pool = getattr(pooling_layers, pooling_func)(
-            in_dim=self.stats_dim * block.expansion)
+        self.n_stats = 1 if pooling_func == "TAP" or pooling_func == "TSDP" else 2
+        self.pool = getattr(pooling_layers, pooling_func)(in_dim=self.stats_dim * block.expansion)
         self.seg_1 = nn.Linear(self.stats_dim * block.expansion * self.n_stats, embedding_size)
         if self.two_emb_layer:
             self.seg_bn_1 = nn.BatchNorm1d(embedding_size, affine=False)
@@ -229,7 +237,7 @@ class ERes2Net(nn.Module):
         else:
             return embed_a
 
-    def forward2(self, x,if_mean):
+    def forward2(self, x, if_mean):
         x = x.permute(0, 2, 1)  # (B,T,F) => (B,F,T)
 
         x = x.unsqueeze_(1)
@@ -243,14 +251,13 @@ class ERes2Net(nn.Module):
         fuse_out123 = self.fuse_mode123(out3, fuse_out12_downsample)
         out4 = self.layer4(out3)
         fuse_out123_downsample = self.layer3_downsample(fuse_out123)
-        fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample).flatten(start_dim=1,end_dim=2)#bs,20480,T
-        if(if_mean==False):
-            mean=fuse_out1234[0].transpose(1,0)#(T,20480),bs=T
+        fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample).flatten(start_dim=1, end_dim=2)  # bs,20480,T
+        if if_mean == False:
+            mean = fuse_out1234[0].transpose(1, 0)  # (T,20480),bs=T
         else:
-            mean = fuse_out1234.mean(2)#bs,20480
-        mean_std=torch.cat([mean,torch.zeros_like(mean)],1)
-        return self.seg_1(mean_std)#(T,192)
-
+            mean = fuse_out1234.mean(2)  # bs,20480
+        mean_std = torch.cat([mean, torch.zeros_like(mean)], 1)
+        return self.seg_1(mean_std)  # (T,192)
 
         # stats = self.pool(fuse_out1234)
         # if self.two_emb_layer:
@@ -275,12 +282,8 @@ class ERes2Net(nn.Module):
         fuse_out123 = self.fuse_mode123(out3, fuse_out12_downsample)
         out4 = self.layer4(out3)
         fuse_out123_downsample = self.layer3_downsample(fuse_out123)
-        fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample).flatten(start_dim=1,end_dim=2).mean(-1)
+        fuse_out1234 = self.fuse_mode1234(out4, fuse_out123_downsample).flatten(start_dim=1, end_dim=2).mean(-1)
         return fuse_out1234
         # print(fuse_out1234.shape)
         # print(fuse_out1234.flatten(start_dim=1,end_dim=2).shape)
         # pdb.set_trace()
-
-
-
-
diff --git a/GPT_SoVITS/eres2net/fusion.py b/GPT_SoVITS/eres2net/fusion.py
index 2aff7a72..d156a55c 100644
--- a/GPT_SoVITS/eres2net/fusion.py
+++ b/GPT_SoVITS/eres2net/fusion.py
@@ -6,7 +6,6 @@ import torch.nn as nn
 
 
 class AFF(nn.Module):
-
     def __init__(self, channels=64, r=4):
         super(AFF, self).__init__()
         inter_channels = int(channels // r)
@@ -23,7 +22,6 @@ class AFF(nn.Module):
         xa = torch.cat((x, ds_y), dim=1)
         x_att = self.local_att(xa)
         x_att = 1.0 + torch.tanh(x_att)
-        xo = torch.mul(x, x_att) + torch.mul(ds_y, 2.0-x_att)
+        xo = torch.mul(x, x_att) + torch.mul(ds_y, 2.0 - x_att)
 
         return xo
-
diff --git a/GPT_SoVITS/eres2net/kaldi.py b/GPT_SoVITS/eres2net/kaldi.py
index 58664c0c..a80e5e6b 100644
--- a/GPT_SoVITS/eres2net/kaldi.py
+++ b/GPT_SoVITS/eres2net/kaldi.py
@@ -144,7 +144,7 @@ def _get_waveform_and_window_properties(
     )
     assert 0 < window_shift, "`window_shift` must be greater than 0"
     assert padded_window_size % 2 == 0, (
-        "the padded `window_size` must be divisible by two." " use `round_to_power_of_two` or change `frame_length`"
+        "the padded `window_size` must be divisible by two. use `round_to_power_of_two` or change `frame_length`"
     )
     assert 0.0 <= preemphasis_coefficient <= 1.0, "`preemphasis_coefficient` must be between [0,1]"
     assert sample_frequency > 0, "`sample_frequency` must be greater than zero"
@@ -441,7 +441,9 @@ def get_mel_banks(
     high_freq: float,
     vtln_low: float,
     vtln_high: float,
-    vtln_warp_factor: float,device=None,dtype=None
+    vtln_warp_factor: float,
+    device=None,
+    dtype=None,
 ) -> Tuple[Tensor, Tensor]:
     """
     Returns:
@@ -457,9 +459,9 @@ def get_mel_banks(
     if high_freq <= 0.0:
         high_freq += nyquist
 
-    assert (
-        (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq)
-    ), "Bad values in options: low-freq {} and high-freq {} vs. nyquist {}".format(low_freq, high_freq, nyquist)
+    assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), (
+        "Bad values in options: low-freq {} and high-freq {} vs. nyquist {}".format(low_freq, high_freq, nyquist)
+    )
 
     # fft-bin width [think of it as Nyquist-freq / half-window-length]
     fft_bin_width = sample_freq / window_length_padded
@@ -475,7 +477,7 @@ def get_mel_banks(
 
     assert vtln_warp_factor == 1.0 or (
         (low_freq < vtln_low < high_freq) and (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)
-    ), "Bad values in options: vtln-low {} and vtln-high {}, versus " "low-freq {} and high-freq {}".format(
+    ), "Bad values in options: vtln-low {} and vtln-high {}, versus low-freq {} and high-freq {}".format(
         vtln_low, vtln_high, low_freq, high_freq
     )
 
@@ -508,9 +510,12 @@ def get_mel_banks(
         bins[up_idx] = up_slope[up_idx]
         bins[down_idx] = down_slope[down_idx]
 
-    return bins.to(device=device,dtype=dtype)#, center_freqs
+    return bins.to(device=device, dtype=dtype)  # , center_freqs
+
+
+cache = {}
+
 
-cache={}
 def fbank(
     waveform: Tensor,
     blackman_coeff: float = 0.42,
@@ -620,14 +625,34 @@ def fbank(
     # size (num_mel_bins, padded_window_size // 2)
     # print(num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp)
 
-    cache_key="%s-%s-%s-%s-%s-%s-%s-%s-%s-%s"%(num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp,device,dtype)
+    cache_key = "%s-%s-%s-%s-%s-%s-%s-%s-%s-%s" % (
+        num_mel_bins,
+        padded_window_size,
+        sample_frequency,
+        low_freq,
+        high_freq,
+        vtln_low,
+        vtln_high,
+        vtln_warp,
+        device,
+        dtype,
+    )
     if cache_key not in cache:
         mel_energies = get_mel_banks(
-            num_mel_bins, padded_window_size, sample_frequency, low_freq, high_freq, vtln_low, vtln_high, vtln_warp,device,dtype
+            num_mel_bins,
+            padded_window_size,
+            sample_frequency,
+            low_freq,
+            high_freq,
+            vtln_low,
+            vtln_high,
+            vtln_warp,
+            device,
+            dtype,
         )
-        cache[cache_key]=mel_energies
+        cache[cache_key] = mel_energies
     else:
-        mel_energies=cache[cache_key]
+        mel_energies = cache[cache_key]
 
     # pad right column with zeros and add dimension, size (num_mel_bins, padded_window_size // 2 + 1)
     mel_energies = torch.nn.functional.pad(mel_energies, (0, 1), mode="constant", value=0)
diff --git a/GPT_SoVITS/eres2net/pooling_layers.py b/GPT_SoVITS/eres2net/pooling_layers.py
index 1fa0e7d6..c3e0eab6 100644
--- a/GPT_SoVITS/eres2net/pooling_layers.py
+++ b/GPT_SoVITS/eres2net/pooling_layers.py
@@ -1,7 +1,7 @@
 # Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
 
-""" This implementation is adapted from https://github.com/wenet-e2e/wespeaker."""
+"""This implementation is adapted from https://github.com/wenet-e2e/wespeaker."""
 
 import torch
 import torch.nn as nn
@@ -11,6 +11,7 @@ class TAP(nn.Module):
     """
     Temporal average pooling, only first-order mean is considered
     """
+
     def __init__(self, **kwargs):
         super(TAP, self).__init__()
 
@@ -25,6 +26,7 @@ class TSDP(nn.Module):
     """
     Temporal standard deviation pooling, only second-order std is considered
     """
+
     def __init__(self, **kwargs):
         super(TSDP, self).__init__()
 
@@ -41,6 +43,7 @@ class TSTP(nn.Module):
     x-vector
     Comment: simple concatenation can not make full use of both statistics
     """
+
     def __init__(self, **kwargs):
         super(TSTP, self).__init__()
 
@@ -56,9 +59,10 @@ class TSTP(nn.Module):
 
 
 class ASTP(nn.Module):
-    """ Attentive statistics pooling: Channel- and context-dependent
-        statistics pooling, first used in ECAPA_TDNN.
+    """Attentive statistics pooling: Channel- and context-dependent
+    statistics pooling, first used in ECAPA_TDNN.
     """
+
     def __init__(self, in_dim, bottleneck_dim=128, global_context_att=False):
         super(ASTP, self).__init__()
         self.global_context_att = global_context_att
@@ -66,15 +70,10 @@ class ASTP(nn.Module):
         # Use Conv1d with stride == 1 rather than Linear, then we don't
         # need to transpose inputs.
         if global_context_att:
-            self.linear1 = nn.Conv1d(
-                in_dim * 3, bottleneck_dim,
-                kernel_size=1)  # equals W and b in the paper
+            self.linear1 = nn.Conv1d(in_dim * 3, bottleneck_dim, kernel_size=1)  # equals W and b in the paper
         else:
-            self.linear1 = nn.Conv1d(
-                in_dim, bottleneck_dim,
-                kernel_size=1)  # equals W and b in the paper
-        self.linear2 = nn.Conv1d(bottleneck_dim, in_dim,
-                                 kernel_size=1)  # equals V and k in the paper
+            self.linear1 = nn.Conv1d(in_dim, bottleneck_dim, kernel_size=1)  # equals W and b in the paper
+        self.linear2 = nn.Conv1d(bottleneck_dim, in_dim, kernel_size=1)  # equals V and k in the paper
 
     def forward(self, x):
         """
@@ -88,15 +87,13 @@ class ASTP(nn.Module):
 
         if self.global_context_att:
             context_mean = torch.mean(x, dim=-1, keepdim=True).expand_as(x)
-            context_std = torch.sqrt(
-                torch.var(x, dim=-1, keepdim=True) + 1e-10).expand_as(x)
+            context_std = torch.sqrt(torch.var(x, dim=-1, keepdim=True) + 1e-10).expand_as(x)
             x_in = torch.cat((x, context_mean, context_std), dim=1)
         else:
             x_in = x
 
         # DON'T use ReLU here! ReLU may be hard to converge.
-        alpha = torch.tanh(
-            self.linear1(x_in))  # alpha = F.relu(self.linear1(x_in))
+        alpha = torch.tanh(self.linear1(x_in))  # alpha = F.relu(self.linear1(x_in))
         alpha = torch.softmax(self.linear2(alpha), dim=2)
         mean = torch.sum(alpha * x, dim=2)
         var = torch.sum(alpha * (x**2), dim=2) - mean**2
diff --git a/GPT_SoVITS/export_torch_script.py b/GPT_SoVITS/export_torch_script.py
index bf32ed6e..a3a48277 100644
--- a/GPT_SoVITS/export_torch_script.py
+++ b/GPT_SoVITS/export_torch_script.py
@@ -37,10 +37,13 @@ default_config = {
 }
 
 sv_cn_model = None
+
+
 def init_sv_cn(device, is_half):
     global sv_cn_model
     sv_cn_model = SV(device, is_half)
 
+
 def load_sovits_new(sovits_path):
     f = open(sovits_path, "rb")
     meta = f.read(2)
@@ -129,7 +132,9 @@ def sample(
 
 
 @torch.jit.script
-def spectrogram_torch(hann_window:Tensor, y: Tensor, n_fft: int, sampling_rate: int, hop_size: int, win_size: int, center: bool = False):
+def spectrogram_torch(
+    hann_window: Tensor, y: Tensor, n_fft: int, sampling_rate: int, hop_size: int, win_size: int, center: bool = False
+):
     # hann_window = torch.hann_window(win_size, device=y.device, dtype=y.dtype)
     y = torch.nn.functional.pad(
         y.unsqueeze(1),
@@ -380,8 +385,9 @@ class VitsModel(nn.Module):
             self.vq_model = self.vq_model.half()
         self.vq_model = self.vq_model.to(device)
         self.vq_model.eval()
-        self.hann_window = torch.hann_window(self.hps.data.win_length, device=device, dtype= torch.float16 if is_half else torch.float32)
-
+        self.hann_window = torch.hann_window(
+            self.hps.data.win_length, device=device, dtype=torch.float16 if is_half else torch.float32
+        )
 
     def forward(self, text_seq, pred_semantic, ref_audio, speed=1.0, sv_emb=None):
         refer = spectrogram_torch(
@@ -667,7 +673,9 @@ def export(gpt_path, vits_path, ref_audio_path, ref_text, output_path, export_be
     ref_seq = torch.LongTensor([ref_seq_id]).to(device)
     ref_bert = ref_bert_T.T.to(ref_seq.device)
     text_seq_id, text_bert_T, norm_text = get_phones_and_bert(
-        "这是一个简单的示例，真没想到这么简单就完成了。The King and His Stories.Once there was a king. He likes to write stories, but his stories were not good. As people were afraid of him, they all said his stories were good.After reading them, the writer at once turned to the soldiers and said: Take me back to prison, please.", "auto", "v2"
+        "这是一个简单的示例，真没想到这么简单就完成了。The King and His Stories.Once there was a king. He likes to write stories, but his stories were not good. As people were afraid of him, they all said his stories were good.After reading them, the writer at once turned to the soldiers and said: Take me back to prison, please.",
+        "auto",
+        "v2",
     )
     text_seq = torch.LongTensor([text_seq_id]).to(device)
     text_bert = text_bert_T.T.to(text_seq.device)
@@ -675,7 +683,7 @@ def export(gpt_path, vits_path, ref_audio_path, ref_text, output_path, export_be
     ssl_content = ssl(ref_audio).to(device)
 
     # vits_path = "SoVITS_weights_v2/xw_e8_s216.pth"
-    vits = VitsModel(vits_path,device=device,is_half=False)
+    vits = VitsModel(vits_path, device=device, is_half=False)
     vits.eval()
 
     # gpt_path = "GPT_weights_v2/xw-e15.ckpt"
@@ -726,7 +734,7 @@ def export_prov2(
     is_half=True,
 ):
     if sv_cn_model == None:
-        init_sv_cn(device,is_half)
+        init_sv_cn(device, is_half)
 
     if not os.path.exists(output_path):
         os.makedirs(output_path)
@@ -747,9 +755,7 @@ def export_prov2(
 
     print(f"device: {device}")
 
-    ref_seq_id, ref_bert_T, ref_norm_text = get_phones_and_bert(
-        ref_text, "all_zh", "v2"
-    )
+    ref_seq_id, ref_bert_T, ref_norm_text = get_phones_and_bert(ref_text, "all_zh", "v2")
     ref_seq = torch.LongTensor([ref_seq_id]).to(device)
     ref_bert = ref_bert_T.T
     if is_half:
@@ -757,7 +763,9 @@ def export_prov2(
     ref_bert = ref_bert.to(ref_seq.device)
 
     text_seq_id, text_bert_T, norm_text = get_phones_and_bert(
-        "这是一个简单的示例，真没想到这么简单就完成了。The King and His Stories.Once there was a king. He likes to write stories, but his stories were not good. As people were afraid of him, they all said his stories were good.After reading them, the writer at once turned to the soldiers and said: Take me back to prison, please.", "auto", "v2"
+        "这是一个简单的示例，真没想到这么简单就完成了。The King and His Stories.Once there was a king. He likes to write stories, but his stories were not good. As people were afraid of him, they all said his stories were good.After reading them, the writer at once turned to the soldiers and said: Take me back to prison, please.",
+        "auto",
+        "v2",
     )
     text_seq = torch.LongTensor([text_seq_id]).to(device)
     text_bert = text_bert_T.T
@@ -773,7 +781,7 @@ def export_prov2(
     sv_model = ExportERes2NetV2(sv_cn_model)
 
     # vits_path = "SoVITS_weights_v2/xw_e8_s216.pth"
-    vits = VitsModel(vits_path, version,is_half=is_half,device=device)
+    vits = VitsModel(vits_path, version, is_half=is_half, device=device)
     vits.eval()
 
     # gpt_path = "GPT_weights_v2/xw-e15.ckpt"
@@ -871,7 +879,7 @@ class GPT_SoVITS(nn.Module):
 
 
 class ExportERes2NetV2(nn.Module):
-    def __init__(self, sv_cn_model:SV):
+    def __init__(self, sv_cn_model: SV):
         super(ExportERes2NetV2, self).__init__()
         self.bn1 = sv_cn_model.embedding_model.bn1
         self.conv1 = sv_cn_model.embedding_model.conv1
@@ -898,11 +906,11 @@ class ExportERes2NetV2(nn.Module):
         out4 = self.layer4(out3)
         out3_ds = self.layer3_ds(out3)
         fuse_out34 = self.fuse34(out4, out3_ds)
-        return fuse_out34.flatten(start_dim=1,end_dim=2).mean(-1)
+        return fuse_out34.flatten(start_dim=1, end_dim=2).mean(-1)
 
 
 class GPT_SoVITS_V2Pro(nn.Module):
-    def __init__(self, t2s: T2SModel, vits: VitsModel,sv_model:ExportERes2NetV2):
+    def __init__(self, t2s: T2SModel, vits: VitsModel, sv_model: ExportERes2NetV2):
         super().__init__()
         self.t2s = t2s
         self.vits = vits
@@ -930,6 +938,7 @@ class GPT_SoVITS_V2Pro(nn.Module):
         audio = self.vits(text_seq, pred_semantic, ref_audio_sr, speed, sv_emb)
         return audio
 
+
 def test():
     parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
     parser.add_argument("--gpt_model", required=True, help="Path to the GPT model file")
@@ -1046,24 +1055,14 @@ def export_symbel(version="v2"):
 def main():
     parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
     parser.add_argument("--gpt_model", required=True, help="Path to the GPT model file")
-    parser.add_argument(
-        "--sovits_model", required=True, help="Path to the SoVITS model file"
-    )
-    parser.add_argument(
-        "--ref_audio", required=True, help="Path to the reference audio file"
-    )
-    parser.add_argument(
-        "--ref_text", required=True, help="Path to the reference text file"
-    )
-    parser.add_argument(
-        "--output_path", required=True, help="Path to the output directory"
-    )
-    parser.add_argument(
-        "--export_common_model", action="store_true", help="Export Bert and SSL model"
-    )
+    parser.add_argument("--sovits_model", required=True, help="Path to the SoVITS model file")
+    parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file")
+    parser.add_argument("--ref_text", required=True, help="Path to the reference text file")
+    parser.add_argument("--output_path", required=True, help="Path to the output directory")
+    parser.add_argument("--export_common_model", action="store_true", help="Export Bert and SSL model")
     parser.add_argument("--device", help="Device to use")
     parser.add_argument("--version", help="version of the model", default="v2")
-    parser.add_argument("--no-half", action="store_true", help = "Do not use half precision for model weights")
+    parser.add_argument("--no-half", action="store_true", help="Do not use half precision for model weights")
 
     args = parser.parse_args()
     if args.version in ["v2Pro", "v2ProPlus"]:
diff --git a/GPT_SoVITS/export_torch_script_v3v4.py b/GPT_SoVITS/export_torch_script_v3v4.py
index 1fd63b3c..b0e4dba5 100644
--- a/GPT_SoVITS/export_torch_script_v3v4.py
+++ b/GPT_SoVITS/export_torch_script_v3v4.py
@@ -406,7 +406,7 @@ class GPTSoVITSV3(torch.nn.Module):
         chunk_len = 934 - fea_ref.shape[2]
         wav_gen_list = []
         idx = 0
-        fea_todo = fea_todo[:,:,:-5]
+        fea_todo = fea_todo[:, :, :-5]
         wav_gen_length = fea_todo.shape[2] * 256
         while 1:
             # current_time = datetime.now()
@@ -438,7 +438,8 @@ class GPTSoVITSV3(torch.nn.Module):
 
         wav_gen = torch.cat(wav_gen_list, 2)
         return wav_gen[0][0][:wav_gen_length]
-    
+
+
 class GPTSoVITSV4(torch.nn.Module):
     def __init__(self, gpt_sovits_half, cfm, hifigan):
         super().__init__()
@@ -465,7 +466,7 @@ class GPTSoVITSV4(torch.nn.Module):
         chunk_len = 1000 - fea_ref.shape[2]
         wav_gen_list = []
         idx = 0
-        fea_todo = fea_todo[:,:,:-10]
+        fea_todo = fea_todo[:, :, :-10]
         wav_gen_length = fea_todo.shape[2] * 480
         while 1:
             # current_time = datetime.now()
@@ -581,6 +582,7 @@ from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
 
 v3v4set = {"v3", "v4"}
 
+
 def get_sovits_weights(sovits_path):
     path_sovits_v3 = "GPT_SoVITS/pretrained_models/s2Gv3.pth"
     is_exist_s2gv3 = os.path.exists(path_sovits_v3)
@@ -703,14 +705,13 @@ def export_cfm(
     return export_cfm
 
 
-def export_1(ref_wav_path,ref_wav_text,version="v3"):
+def export_1(ref_wav_path, ref_wav_text, version="v3"):
     if version == "v3":
         sovits = get_sovits_weights("GPT_SoVITS/pretrained_models/s2Gv3.pth")
         init_bigvgan()
     else:
         sovits = get_sovits_weights("GPT_SoVITS/pretrained_models/gsv-v4-pretrained/s2Gv4.pth")
         init_hifigan()
-    
 
     dict_s1 = torch.load("GPT_SoVITS/pretrained_models/s1v3.ckpt")
     raw_t2s = get_raw_t2s_model(dict_s1).to(device)
@@ -755,9 +756,7 @@ def export_1(ref_wav_path,ref_wav_text,version="v3"):
     # phones1, bert1, norm_text1 = get_phones_and_bert(
     #     "你这老坏蛋，我找了你这么久，真没想到在这里找到你。他说。", "all_zh", "v3"
     # )
-    phones1, bert1, norm_text1 = get_phones_and_bert(
-        ref_wav_text, "auto", "v3"
-    )
+    phones1, bert1, norm_text1 = get_phones_and_bert(ref_wav_text, "auto", "v3")
     phones2, bert2, norm_text2 = get_phones_and_bert(
         "这是一个简单的示例，真没想到这么简单就完成了。The King and His Stories.Once there was a king. He likes to write stories, but his stories were not good. As people were afraid of him, they all said his stories were good.After reading them, the writer at once turned to the soldiers and said: Take me back to prison, please.",
         "auto",
@@ -918,7 +917,7 @@ def export_1(ref_wav_path,ref_wav_text,version="v3"):
             hifigan_model_ = torch.jit.trace(hifigan_model, optimize=True, example_inputs=(cmf_res_rand,))
             hifigan_model_.save("onnx/ad/hifigan_model.pt")
             wav_gen = hifigan_model(cmf_res)
-        
+
         print("wav_gen:", wav_gen.shape, wav_gen.dtype)
         audio = wav_gen[0][0].cpu().detach().numpy()
 
@@ -1205,7 +1204,6 @@ def export_2(version="v3"):
     gpt_sovits_v3v4 = gpt_sovits_v3 if version == "v3" else gpt_sovits_v4
     sr = 24000 if version == "v3" else 48000
 
-
     time.sleep(5)
     # print("thread:", torch.get_num_threads())
     # print("thread:", torch.get_num_interop_threads())
@@ -1216,14 +1214,14 @@ def export_2(version="v3"):
         "汗流浃背了呀!老弟~ My uncle has two dogs. One is big and the other is small. He likes them very much. He often plays with them. He takes them for a walk every day. He says they are his good friends. He is very happy with them. 最后还是我得了 MVP....",
         gpt_sovits_v3v4,
         "out.wav",
-        sr
+        sr,
     )
 
     test_export(
         "你小子是什么来路.汗流浃背了呀!老弟~ My uncle has two dogs. He is very happy with them. 最后还是我得了 MVP!",
         gpt_sovits_v3v4,
         "out2.wav",
-        sr
+        sr,
     )
 
     # test_export(
diff --git a/GPT_SoVITS/f5_tts/model/backbones/dit.py b/GPT_SoVITS/f5_tts/model/backbones/dit.py
index f64a3c39..4aa3b9ac 100644
--- a/GPT_SoVITS/f5_tts/model/backbones/dit.py
+++ b/GPT_SoVITS/f5_tts/model/backbones/dit.py
@@ -143,9 +143,9 @@ class DiT(nn.Module):
         drop_audio_cond=False,  # cfg for cond audio
         drop_text=False,  # cfg for text
         # mask: bool["b n"] | None = None,  # noqa: F722
-        infer=False, # bool
-        text_cache=None, # torch tensor as text_embed
-        dt_cache=None, # torch tensor as dt
+        infer=False,  # bool
+        text_cache=None,  # torch tensor as text_embed
+        dt_cache=None,  # torch tensor as dt
     ):
         x = x0.transpose(2, 1)
         cond = cond0.transpose(2, 1)
@@ -191,4 +191,4 @@ class DiT(nn.Module):
         if infer:
             return output, text_embed, dt
         else:
-            return output
\ No newline at end of file
+            return output
diff --git a/GPT_SoVITS/module/data_utils.py b/GPT_SoVITS/module/data_utils.py
index 81829683..46eff5fb 100644
--- a/GPT_SoVITS/module/data_utils.py
+++ b/GPT_SoVITS/module/data_utils.py
@@ -21,7 +21,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
     3) computes spectrograms from audio files.
     """
 
-    def __init__(self, hparams, version=None,val=False):
+    def __init__(self, hparams, version=None, val=False):
         exp_dir = hparams.exp_dir
         self.path2 = "%s/2-name2text.txt" % exp_dir
         self.path4 = "%s/4-cnhubert" % exp_dir
@@ -29,7 +29,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
         assert os.path.exists(self.path2)
         assert os.path.exists(self.path4)
         assert os.path.exists(self.path5)
-        self.is_v2Pro=version in {"v2Pro","v2ProPlus"}
+        self.is_v2Pro = version in {"v2Pro", "v2ProPlus"}
         if self.is_v2Pro:
             self.path7 = "%s/7-sv_cn" % exp_dir
             assert os.path.exists(self.path7)
@@ -118,7 +118,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
                     ssl = F.pad(ssl.float(), (0, 1), mode="replicate").to(typee)
                 ssl.requires_grad = False
                 if self.is_v2Pro:
-                    sv_emb=torch.load("%s/%s.pt" % (self.path7, audiopath), map_location="cpu")
+                    sv_emb = torch.load("%s/%s.pt" % (self.path7, audiopath), map_location="cpu")
         except:
             traceback.print_exc()
             spec = torch.zeros(1025, 100)
@@ -126,10 +126,10 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
             ssl = torch.zeros(1, 768, 100)
             text = text[-1:]
             if self.is_v2Pro:
-                sv_emb=torch.zeros(1,20480)
+                sv_emb = torch.zeros(1, 20480)
             print("load audio or ssl error!!!!!!", audiopath)
         if self.is_v2Pro:
-            return (ssl, spec, wav, text,sv_emb)
+            return (ssl, spec, wav, text, sv_emb)
         else:
             return (ssl, spec, wav, text)
 
@@ -192,9 +192,9 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
 class TextAudioSpeakerCollate:
     """Zero-pads model inputs and targets"""
 
-    def __init__(self, return_ids=False,version=None):
+    def __init__(self, return_ids=False, version=None):
         self.return_ids = return_ids
-        self.is_v2Pro=version in {"v2Pro","v2ProPlus"}
+        self.is_v2Pro = version in {"v2Pro", "v2ProPlus"}
 
     def __call__(self, batch):
         """Collate's training batch from normalized text, audio and speaker identities
@@ -228,7 +228,7 @@ class TextAudioSpeakerCollate:
         text_padded.zero_()
 
         if self.is_v2Pro:
-            sv_embs=torch.FloatTensor(len(batch),20480)
+            sv_embs = torch.FloatTensor(len(batch), 20480)
 
         for i in range(len(ids_sorted_decreasing)):
             row = batch[ids_sorted_decreasing[i]]
@@ -250,11 +250,30 @@ class TextAudioSpeakerCollate:
             text_lengths[i] = text.size(0)
 
             if self.is_v2Pro:
-                sv_embs[i]=row[4]
+                sv_embs[i] = row[4]
         if self.is_v2Pro:
-            return ssl_padded, ssl_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, text_padded, text_lengths,sv_embs
+            return (
+                ssl_padded,
+                ssl_lengths,
+                spec_padded,
+                spec_lengths,
+                wav_padded,
+                wav_lengths,
+                text_padded,
+                text_lengths,
+                sv_embs,
+            )
         else:
-            return ssl_padded, ssl_lengths, spec_padded, spec_lengths, wav_padded, wav_lengths, text_padded, text_lengths
+            return (
+                ssl_padded,
+                ssl_lengths,
+                spec_padded,
+                spec_lengths,
+                wav_padded,
+                wav_lengths,
+                text_padded,
+                text_lengths,
+            )
 
 
 class TextAudioSpeakerLoaderV3(torch.utils.data.Dataset):
diff --git a/GPT_SoVITS/module/models.py b/GPT_SoVITS/module/models.py
index 4fbec59f..1c8e662f 100644
--- a/GPT_SoVITS/module/models.py
+++ b/GPT_SoVITS/module/models.py
@@ -586,12 +586,17 @@ class DiscriminatorS(torch.nn.Module):
 
         return x, fmap
 
-v2pro_set={"v2Pro","v2ProPlus"}
+
+v2pro_set = {"v2Pro", "v2ProPlus"}
+
+
 class MultiPeriodDiscriminator(torch.nn.Module):
-    def __init__(self, use_spectral_norm=False,version=None):
+    def __init__(self, use_spectral_norm=False, version=None):
         super(MultiPeriodDiscriminator, self).__init__()
-        if version in v2pro_set:periods = [2, 3, 5, 7, 11,17,23]
-        else:periods = [2, 3, 5, 7, 11]
+        if version in v2pro_set:
+            periods = [2, 3, 5, 7, 11, 17, 23]
+        else:
+            periods = [2, 3, 5, 7, 11]
 
         discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
         discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods]
@@ -787,6 +792,7 @@ class CodePredictor(nn.Module):
 
             return pred_codes.transpose(0, 1)
 
+
 class SynthesizerTrn(nn.Module):
     """
     Synthesizer for Training
@@ -886,13 +892,13 @@ class SynthesizerTrn(nn.Module):
         self.quantizer = ResidualVectorQuantizer(dimension=ssl_dim, n_q=1, bins=1024)
         self.freeze_quantizer = freeze_quantizer
 
-        self.is_v2pro=self.version in v2pro_set
+        self.is_v2pro = self.version in v2pro_set
         if self.is_v2pro:
             self.sv_emb = nn.Linear(20480, gin_channels)
             self.ge_to512 = nn.Linear(gin_channels, 512)
             self.prelu = nn.PReLU(num_parameters=gin_channels)
 
-    def forward(self, ssl, y, y_lengths, text, text_lengths,sv_emb=None):
+    def forward(self, ssl, y, y_lengths, text, text_lengths, sv_emb=None):
         y_mask = torch.unsqueeze(commons.sequence_mask(y_lengths, y.size(2)), 1).to(y.dtype)
         if self.version == "v1":
             ge = self.ref_enc(y * y_mask, y_mask)
@@ -952,7 +958,7 @@ class SynthesizerTrn(nn.Module):
         return o, y_mask, (z, z_p, m_p, logs_p)
 
     @torch.no_grad()
-    def decode(self, codes, text, refer,noise_scale=0.5, speed=1, sv_emb=None):
+    def decode(self, codes, text, refer, noise_scale=0.5, speed=1, sv_emb=None):
         def get_ge(refer, sv_emb):
             ge = None
             if refer is not None:
@@ -970,8 +976,8 @@ class SynthesizerTrn(nn.Module):
 
         if type(refer) == list:
             ges = []
-            for idx,_refer in enumerate(refer):
-                ge = get_ge(_refer, sv_emb[idx]if self.is_v2pro else None)
+            for idx, _refer in enumerate(refer):
+                ge = get_ge(_refer, sv_emb[idx] if self.is_v2pro else None)
                 ges.append(ge)
             ge = torch.stack(ges, 0).mean(0)
         else:
@@ -983,7 +989,14 @@ class SynthesizerTrn(nn.Module):
         quantized = self.quantizer.decode(codes)
         if self.semantic_frame_rate == "25hz":
             quantized = F.interpolate(quantized, size=int(quantized.shape[-1] * 2), mode="nearest")
-        x, m_p, logs_p, y_mask = self.enc_p(quantized, y_lengths, text, text_lengths, self.ge_to512(ge.transpose(2,1)).transpose(2,1)if self.is_v2pro else ge, speed)
+        x, m_p, logs_p, y_mask = self.enc_p(
+            quantized,
+            y_lengths,
+            text,
+            text_lengths,
+            self.ge_to512(ge.transpose(2, 1)).transpose(2, 1) if self.is_v2pro else ge,
+            speed,
+        )
         z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
 
         z = self.flow(z_p, y_mask, g=ge, reverse=True)
@@ -996,6 +1009,7 @@ class SynthesizerTrn(nn.Module):
         quantized, codes, commit_loss, quantized_list = self.quantizer(ssl)
         return codes.transpose(0, 1)
 
+
 class CFM(torch.nn.Module):
     def __init__(self, in_channels, dit):
         super().__init__()
@@ -1029,7 +1043,18 @@ class CFM(torch.nn.Module):
             t_tensor = torch.ones(x.shape[0], device=x.device, dtype=mu.dtype) * t
             # v_pred = model(x, t_tensor, d_tensor, **extra_args)
             v_pred, text_emb, dt = self.estimator(
-                x, prompt_x, x_lens, t_tensor, d_tensor, mu, use_grad_ckpt=False, drop_audio_cond=False, drop_text=False, infer=True, text_cache=text_cache, dt_cache=dt_cache
+                x,
+                prompt_x,
+                x_lens,
+                t_tensor,
+                d_tensor,
+                mu,
+                use_grad_ckpt=False,
+                drop_audio_cond=False,
+                drop_text=False,
+                infer=True,
+                text_cache=text_cache,
+                dt_cache=dt_cache,
             )
             v_pred = v_pred.transpose(2, 1)
             if self.use_conditioner_cache:
@@ -1037,18 +1062,18 @@ class CFM(torch.nn.Module):
                 dt_cache = dt
             if inference_cfg_rate > 1e-5:
                 neg, text_cfg_emb, _ = self.estimator(
-                                    x,
-                                    prompt_x,
-                                    x_lens,
-                                    t_tensor,
-                                    d_tensor,
-                                    mu,
-                                    use_grad_ckpt=False,
-                                    drop_audio_cond=True,
-                                    drop_text=True,
-                                    infer=True, 
-                                    text_cache=text_cfg_cache, 
-                                    dt_cache=dt_cache
+                    x,
+                    prompt_x,
+                    x_lens,
+                    t_tensor,
+                    d_tensor,
+                    mu,
+                    use_grad_ckpt=False,
+                    drop_audio_cond=True,
+                    drop_text=True,
+                    infer=True,
+                    text_cache=text_cfg_cache,
+                    dt_cache=dt_cache,
                 )
                 neg = neg.transpose(2, 1)
                 if self.use_conditioner_cache:
diff --git a/GPT_SoVITS/module/models_onnx.py b/GPT_SoVITS/module/models_onnx.py
index 525273f3..b62b8b71 100644
--- a/GPT_SoVITS/module/models_onnx.py
+++ b/GPT_SoVITS/module/models_onnx.py
@@ -762,7 +762,9 @@ class CodePredictor(nn.Module):
 
             return pred_codes.transpose(0, 1)
 
-v2pro_set={"v2Pro","v2ProPlus"}
+
+v2pro_set = {"v2Pro", "v2ProPlus"}
+
 
 class SynthesizerTrn(nn.Module):
     """
@@ -868,7 +870,7 @@ class SynthesizerTrn(nn.Module):
             # self.enc_p.text_embedding.requires_grad_(False)
             # self.enc_p.encoder_text.requires_grad_(False)
             # self.enc_p.mrte.requires_grad_(False)
-        self.is_v2pro=self.version in v2pro_set
+        self.is_v2pro = self.version in v2pro_set
         if self.is_v2pro:
             self.sv_emb = nn.Linear(20480, gin_channels)
             self.ge_to512 = nn.Linear(gin_channels, 512)
@@ -891,7 +893,7 @@ class SynthesizerTrn(nn.Module):
             quantized = dquantized.contiguous().view(1, self.ssl_dim, -1)
 
         if self.is_v2pro:
-            ge_ = self.ge_to512(ge.transpose(2,1)).transpose(2,1)
+            ge_ = self.ge_to512(ge.transpose(2, 1)).transpose(2, 1)
             x, m_p, logs_p, y_mask = self.enc_p(quantized, text, ge_, speed)
         else:
             x, m_p, logs_p, y_mask = self.enc_p(quantized, text, ge, speed)
diff --git a/GPT_SoVITS/module/modules.py b/GPT_SoVITS/module/modules.py
index 9a94898f..6fa84a43 100644
--- a/GPT_SoVITS/module/modules.py
+++ b/GPT_SoVITS/module/modules.py
@@ -1,5 +1,4 @@
 import math
-import pdb
 
 import numpy as np
 import torch
@@ -720,10 +719,10 @@ class MelStyleEncoder(nn.Module):
         else:
             len_ = (~mask).sum(dim=1).unsqueeze(1)
             x = x.masked_fill(mask.unsqueeze(-1), 0)
-            dtype=x.dtype
+            dtype = x.dtype
             x = x.float()
-            x=torch.div(x,len_.unsqueeze(1))
-            out=x.sum(dim=1).to(dtype)
+            x = torch.div(x, len_.unsqueeze(1))
+            out = x.sum(dim=1).to(dtype)
         return out
 
     def forward(self, x, mask=None):
diff --git a/GPT_SoVITS/prepare_datasets/2-get-sv.py b/GPT_SoVITS/prepare_datasets/2-get-sv.py
index 8980833f..80b0ad69 100644
--- a/GPT_SoVITS/prepare_datasets/2-get-sv.py
+++ b/GPT_SoVITS/prepare_datasets/2-get-sv.py
@@ -10,7 +10,6 @@ i_part = os.environ.get("i_part")
 all_parts = os.environ.get("all_parts")
 if "_CUDA_VISIBLE_DEVICES" in os.environ:
     os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
-from feature_extractor import cnhubert
 
 opt_dir = os.environ.get("opt_dir")
 sv_path = os.environ.get("sv_path")
@@ -19,19 +18,18 @@ import torch
 is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
 
 import traceback
-import numpy as np
-from scipy.io import wavfile
 import torchaudio
 
 now_dir = os.getcwd()
 sys.path.append(now_dir)
 sys.path.append(f"{now_dir}/GPT_SoVITS/eres2net")
-from tools.my_utils import load_audio, clean_path
+from tools.my_utils import clean_path
 from time import time as ttime
 import shutil
 from ERes2NetV2 import ERes2NetV2
 import kaldi as Kaldi
 
+
 def my_save(fea, path):  #####fix issue: torch.save doesn't support chinese path
     dir = os.path.dirname(path)
     name = os.path.basename(path)
@@ -56,37 +54,45 @@ if torch.cuda.is_available():
 else:
     device = "cpu"
 
+
 class SV:
-    def __init__(self,device,is_half):
-        pretrained_state = torch.load(sv_path, map_location='cpu')
-        embedding_model = ERes2NetV2(baseWidth=24,scale=4,expansion=4)
+    def __init__(self, device, is_half):
+        pretrained_state = torch.load(sv_path, map_location="cpu")
+        embedding_model = ERes2NetV2(baseWidth=24, scale=4, expansion=4)
         embedding_model.load_state_dict(pretrained_state)
         embedding_model.eval()
-        self.embedding_model=embedding_model
-        self.res=torchaudio.transforms.Resample(32000, 16000).to(device)
+        self.embedding_model = embedding_model
+        self.res = torchaudio.transforms.Resample(32000, 16000).to(device)
         if is_half == False:
-            self.embedding_model=self.embedding_model.to(device)
+            self.embedding_model = self.embedding_model.to(device)
         else:
-            self.embedding_model=self.embedding_model.half().to(device)
-        self.is_half=is_half
+            self.embedding_model = self.embedding_model.half().to(device)
+        self.is_half = is_half
 
-    def compute_embedding3(self,wav):#(1,x)#-1~1
+    def compute_embedding3(self, wav):  # (1,x)#-1~1
         with torch.no_grad():
-            wav=self.res(wav)
-            if self.is_half==True:wav=wav.half()
-            feat = torch.stack([Kaldi.fbank(wav0.unsqueeze(0), num_mel_bins=80, sample_frequency=16000, dither=0) for wav0 in wav])
+            wav = self.res(wav)
+            if self.is_half == True:
+                wav = wav.half()
+            feat = torch.stack(
+                [Kaldi.fbank(wav0.unsqueeze(0), num_mel_bins=80, sample_frequency=16000, dither=0) for wav0 in wav]
+            )
             sv_emb = self.embedding_model.forward3(feat)
         return sv_emb
 
-sv=SV(device,is_half)
+
+sv = SV(device, is_half)
+
+
 def name2go(wav_name, wav_path):
     sv_cn_path = "%s/%s.pt" % (sv_cn_dir, wav_name)
-    if os.path.exists(sv_cn_path):return
-    wav_path="%s/%s" % (wav32dir, wav_name)
-    wav32k,sr0 = torchaudio.load(wav_path)
-    assert sr0==32000
+    if os.path.exists(sv_cn_path):
+        return
+    wav_path = "%s/%s" % (wav32dir, wav_name)
+    wav32k, sr0 = torchaudio.load(wav_path)
+    assert sr0 == 32000
     wav32k = wav32k.to(device)
-    emb=sv.compute_embedding3(wav32k).cpu()  # torch.Size([1, 20480])
+    emb = sv.compute_embedding3(wav32k).cpu()  # torch.Size([1, 20480])
     my_save(emb, sv_cn_path)
 
 
diff --git a/GPT_SoVITS/process_ckpt.py b/GPT_SoVITS/process_ckpt.py
index ca30359c..20db9b19 100644
--- a/GPT_SoVITS/process_ckpt.py
+++ b/GPT_SoVITS/process_ckpt.py
@@ -17,15 +17,16 @@ def my_save(fea, path):  #####fix issue: torch.save doesn't support chinese path
     shutil.move(tmp_path, "%s/%s" % (dir, name))
 
 
-
 from io import BytesIO
 
-model_version2byte={
-    "v3":b"03",
-    "v4":b"04",
-    "v2Pro":b"05",
-    "v2ProPlus":b"06",
+model_version2byte = {
+    "v3": b"03",
+    "v4": b"04",
+    "v2Pro": b"05",
+    "v2ProPlus": b"06",
 }
+
+
 def my_save2(fea, path, model_version):
     bio = BytesIO()
     torch.save(fea, bio)
@@ -50,7 +51,7 @@ def savee(ckpt, name, epoch, steps, hps, model_version=None, lora_rank=None):
         if lora_rank:
             opt["lora_rank"] = lora_rank
             my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
-        elif (model_version!=None and "Pro"in model_version):
+        elif model_version != None and "Pro" in model_version:
             my_save2(opt, "%s/%s.pth" % (hps.save_weight_dir, name), model_version)
         else:
             my_save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
@@ -58,6 +59,7 @@ def savee(ckpt, name, epoch, steps, hps, model_version=None, lora_rank=None):
     except:
         return traceback.format_exc()
 
+
 """
 00:v1
 01:v2
diff --git a/GPT_SoVITS/s2_train.py b/GPT_SoVITS/s2_train.py
index 0a046049..4b9f6488 100644
--- a/GPT_SoVITS/s2_train.py
+++ b/GPT_SoVITS/s2_train.py
@@ -36,7 +36,7 @@ from module.models import (
     MultiPeriodDiscriminator,
     SynthesizerTrn,
 )
-from process_ckpt import savee,my_save2
+from process_ckpt import savee
 
 torch.backends.cudnn.benchmark = False
 torch.backends.cudnn.deterministic = False
@@ -87,11 +87,30 @@ def run(rank, n_gpus, hps):
     if torch.cuda.is_available():
         torch.cuda.set_device(rank)
 
-    train_dataset = TextAudioSpeakerLoader(hps.data,version=hps.model.version)
+    train_dataset = TextAudioSpeakerLoader(hps.data, version=hps.model.version)
     train_sampler = DistributedBucketSampler(
         train_dataset,
         hps.train.batch_size,
-        [32,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900,],
+        [
+            32,
+            300,
+            400,
+            500,
+            600,
+            700,
+            800,
+            900,
+            1000,
+            1100,
+            1200,
+            1300,
+            1400,
+            1500,
+            1600,
+            1700,
+            1800,
+            1900,
+        ],
         num_replicas=n_gpus,
         rank=rank,
         shuffle=True,
@@ -130,9 +149,9 @@ def run(rank, n_gpus, hps):
     )
 
     net_d = (
-        MultiPeriodDiscriminator(hps.model.use_spectral_norm,version=hps.model.version).cuda(rank)
+        MultiPeriodDiscriminator(hps.model.use_spectral_norm, version=hps.model.version).cuda(rank)
         if torch.cuda.is_available()
-        else MultiPeriodDiscriminator(hps.model.use_spectral_norm,version=hps.model.version).to(device)
+        else MultiPeriodDiscriminator(hps.model.use_spectral_norm, version=hps.model.version).to(device)
     )
     for name, param in net_g.named_parameters():
         if not param.requires_grad:
@@ -235,7 +254,7 @@ def run(rank, n_gpus, hps):
             print(
                 "loaded pretrained %s" % hps.train.pretrained_s2D,
                 net_d.module.load_state_dict(
-                    torch.load(hps.train.pretrained_s2D, map_location="cpu", weights_only=False)["weight"],strict=False
+                    torch.load(hps.train.pretrained_s2D, map_location="cpu", weights_only=False)["weight"], strict=False
                 )
                 if torch.cuda.is_available()
                 else net_d.load_state_dict(
@@ -310,17 +329,44 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
     net_g.train()
     net_d.train()
     for batch_idx, data in enumerate(tqdm(train_loader)):
-        if hps.model.version in {"v2Pro","v2ProPlus"}:
-            ssl, ssl_lengths, spec, spec_lengths, y, y_lengths, text, text_lengths,sv_emb=data
+        if hps.model.version in {"v2Pro", "v2ProPlus"}:
+            ssl, ssl_lengths, spec, spec_lengths, y, y_lengths, text, text_lengths, sv_emb = data
         else:
-            ssl, ssl_lengths, spec, spec_lengths, y, y_lengths, text, text_lengths=data
+            ssl, ssl_lengths, spec, spec_lengths, y, y_lengths, text, text_lengths = data
         if torch.cuda.is_available():
-            spec, spec_lengths = (spec.cuda(rank,non_blocking=True,),spec_lengths.cuda(rank,non_blocking=True,),)
-            y, y_lengths = (y.cuda(rank,non_blocking=True,),y_lengths.cuda(rank,non_blocking=True,),)
+            spec, spec_lengths = (
+                spec.cuda(
+                    rank,
+                    non_blocking=True,
+                ),
+                spec_lengths.cuda(
+                    rank,
+                    non_blocking=True,
+                ),
+            )
+            y, y_lengths = (
+                y.cuda(
+                    rank,
+                    non_blocking=True,
+                ),
+                y_lengths.cuda(
+                    rank,
+                    non_blocking=True,
+                ),
+            )
             ssl = ssl.cuda(rank, non_blocking=True)
             ssl.requires_grad = False
             # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
-            text, text_lengths = (text.cuda(rank,non_blocking=True,),text_lengths.cuda(rank,non_blocking=True,),)
+            text, text_lengths = (
+                text.cuda(
+                    rank,
+                    non_blocking=True,
+                ),
+                text_lengths.cuda(
+                    rank,
+                    non_blocking=True,
+                ),
+            )
             if hps.model.version in {"v2Pro", "v2ProPlus"}:
                 sv_emb = sv_emb.cuda(rank, non_blocking=True)
         else:
@@ -334,9 +380,19 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
                 sv_emb = sv_emb.to(device)
         with autocast(enabled=hps.train.fp16_run):
             if hps.model.version in {"v2Pro", "v2ProPlus"}:
-                (y_hat,kl_ssl,ids_slice,x_mask,z_mask,(z, z_p, m_p, logs_p, m_q, logs_q),stats_ssl) = net_g(ssl, spec, spec_lengths, text, text_lengths,sv_emb)
+                (y_hat, kl_ssl, ids_slice, x_mask, z_mask, (z, z_p, m_p, logs_p, m_q, logs_q), stats_ssl) = net_g(
+                    ssl, spec, spec_lengths, text, text_lengths, sv_emb
+                )
             else:
-                (y_hat,kl_ssl,ids_slice,x_mask,z_mask,(z, z_p, m_p, logs_p, m_q, logs_q),stats_ssl,) = net_g(ssl, spec, spec_lengths, text, text_lengths)
+                (
+                    y_hat,
+                    kl_ssl,
+                    ids_slice,
+                    x_mask,
+                    z_mask,
+                    (z, z_p, m_p, logs_p, m_q, logs_q),
+                    stats_ssl,
+                ) = net_g(ssl, spec, spec_lengths, text, text_lengths)
 
             mel = spec_to_mel_torch(
                 spec,
@@ -508,7 +564,14 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
                 % (
                     hps.name,
                     epoch,
-                    savee(ckpt,hps.name + "_e%s_s%s" % (epoch, global_step),epoch,global_step,hps,model_version=None if hps.model.version not in {"v2Pro","v2ProPlus"}else hps.model.version),
+                    savee(
+                        ckpt,
+                        hps.name + "_e%s_s%s" % (epoch, global_step),
+                        epoch,
+                        global_step,
+                        hps,
+                        model_version=None if hps.model.version not in {"v2Pro", "v2ProPlus"} else hps.model.version,
+                    ),
                 )
             )
 
diff --git a/GPT_SoVITS/sv.py b/GPT_SoVITS/sv.py
index fb5806ff..22e70369 100644
--- a/GPT_SoVITS/sv.py
+++ b/GPT_SoVITS/sv.py
@@ -1,24 +1,32 @@
-import sys,os,torch
+import sys
+import os
+import torch
+
 sys.path.append(f"{os.getcwd()}/GPT_SoVITS/eres2net")
 sv_path = "GPT_SoVITS/pretrained_models/sv/pretrained_eres2netv2w24s4ep4.ckpt"
 from ERes2NetV2 import ERes2NetV2
 import kaldi as Kaldi
+
+
 class SV:
-    def __init__(self,device,is_half):
-        pretrained_state = torch.load(sv_path, map_location='cpu', weights_only=False)
-        embedding_model = ERes2NetV2(baseWidth=24,scale=4,expansion=4)
+    def __init__(self, device, is_half):
+        pretrained_state = torch.load(sv_path, map_location="cpu", weights_only=False)
+        embedding_model = ERes2NetV2(baseWidth=24, scale=4, expansion=4)
         embedding_model.load_state_dict(pretrained_state)
         embedding_model.eval()
-        self.embedding_model=embedding_model
+        self.embedding_model = embedding_model
         if is_half == False:
-            self.embedding_model=self.embedding_model.to(device)
+            self.embedding_model = self.embedding_model.to(device)
         else:
-            self.embedding_model=self.embedding_model.half().to(device)
-        self.is_half=is_half
+            self.embedding_model = self.embedding_model.half().to(device)
+        self.is_half = is_half
 
-    def compute_embedding3(self,wav):
+    def compute_embedding3(self, wav):
         with torch.no_grad():
-            if self.is_half==True:wav=wav.half()
-            feat = torch.stack([Kaldi.fbank(wav0.unsqueeze(0), num_mel_bins=80, sample_frequency=16000, dither=0) for wav0 in wav])
+            if self.is_half == True:
+                wav = wav.half()
+            feat = torch.stack(
+                [Kaldi.fbank(wav0.unsqueeze(0), num_mel_bins=80, sample_frequency=16000, dither=0) for wav0 in wav]
+            )
             sv_emb = self.embedding_model.forward3(feat)
         return sv_emb
diff --git a/GPT_SoVITS/text/LangSegmenter/langsegmenter.py b/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
index 88b93794..0187ea69 100644
--- a/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
+++ b/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
@@ -3,38 +3,44 @@ import re
 
 # jieba静音
 import jieba
+
 jieba.setLogLevel(logging.CRITICAL)
 
 # 更改fast_langdetect大模型位置
 from pathlib import Path
 import fast_langdetect
-fast_langdetect.infer._default_detector = fast_langdetect.infer.LangDetector(fast_langdetect.infer.LangDetectConfig(cache_dir=Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect"))
+
+fast_langdetect.infer._default_detector = fast_langdetect.infer.LangDetector(
+    fast_langdetect.infer.LangDetectConfig(
+        cache_dir=Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect"
+    )
+)
 
 
 from split_lang import LangSplitter
 
 
 def full_en(text):
-    pattern = r'^(?=.*[A-Za-z])[A-Za-z0-9\s\u0020-\u007E\u2000-\u206F\u3000-\u303F\uFF00-\uFFEF]+$'
+    pattern = r"^(?=.*[A-Za-z])[A-Za-z0-9\s\u0020-\u007E\u2000-\u206F\u3000-\u303F\uFF00-\uFFEF]+$"
     return bool(re.match(pattern, text))
 
 
 def full_cjk(text):
     # 来自wiki
     cjk_ranges = [
-        (0x4E00, 0x9FFF),        # CJK Unified Ideographs
-        (0x3400, 0x4DB5),        # CJK Extension A
-        (0x20000, 0x2A6DD),      # CJK Extension B
-        (0x2A700, 0x2B73F),      # CJK Extension C
-        (0x2B740, 0x2B81F),      # CJK Extension D
-        (0x2B820, 0x2CEAF),      # CJK Extension E
-        (0x2CEB0, 0x2EBEF),      # CJK Extension F
-        (0x30000, 0x3134A),      # CJK Extension G
-        (0x31350, 0x323AF),      # CJK Extension H
-        (0x2EBF0, 0x2EE5D),      # CJK Extension H
+        (0x4E00, 0x9FFF),  # CJK Unified Ideographs
+        (0x3400, 0x4DB5),  # CJK Extension A
+        (0x20000, 0x2A6DD),  # CJK Extension B
+        (0x2A700, 0x2B73F),  # CJK Extension C
+        (0x2B740, 0x2B81F),  # CJK Extension D
+        (0x2B820, 0x2CEAF),  # CJK Extension E
+        (0x2CEB0, 0x2EBEF),  # CJK Extension F
+        (0x30000, 0x3134A),  # CJK Extension G
+        (0x31350, 0x323AF),  # CJK Extension H
+        (0x2EBF0, 0x2EE5D),  # CJK Extension H
     ]
 
-    pattern = r'[0-9、-〜。！？.!?… /]+$'
+    pattern = r"[0-9、-〜。！？.!?… /]+$"
 
     cjk_text = ""
     for char in text:
@@ -45,7 +51,7 @@ def full_cjk(text):
     return cjk_text
 
 
-def split_jako(tag_lang,item):
+def split_jako(tag_lang, item):
     if tag_lang == "ja":
         pattern = r"([\u3041-\u3096\u3099\u309A\u30A1-\u30FA\u30FC]+(?:[0-9、-〜。！？.!?… ]+[\u3041-\u3096\u3099\u309A\u30A1-\u30FA\u30FC]*)*)"
     else:
@@ -53,41 +59,40 @@ def split_jako(tag_lang,item):
 
     lang_list: list[dict] = []
     tag = 0
-    for match in re.finditer(pattern, item['text']):
+    for match in re.finditer(pattern, item["text"]):
         if match.start() > tag:
-            lang_list.append({'lang':item['lang'],'text':item['text'][tag:match.start()]})
+            lang_list.append({"lang": item["lang"], "text": item["text"][tag : match.start()]})
 
         tag = match.end()
-        lang_list.append({'lang':tag_lang,'text':item['text'][match.start():match.end()]})
+        lang_list.append({"lang": tag_lang, "text": item["text"][match.start() : match.end()]})
 
-    if tag < len(item['text']):
-        lang_list.append({'lang':item['lang'],'text':item['text'][tag:len(item['text'])]})
+    if tag < len(item["text"]):
+        lang_list.append({"lang": item["lang"], "text": item["text"][tag : len(item["text"])]})
 
     return lang_list
 
 
 def merge_lang(lang_list, item):
-    if lang_list and item['lang'] == lang_list[-1]['lang']:
-        lang_list[-1]['text'] += item['text']
+    if lang_list and item["lang"] == lang_list[-1]["lang"]:
+        lang_list[-1]["text"] += item["text"]
     else:
         lang_list.append(item)
     return lang_list
 
 
-class LangSegmenter():
+class LangSegmenter:
     # 默认过滤器, 基于gsv目前四种语言
     DEFAULT_LANG_MAP = {
         "zh": "zh",
         "yue": "zh",  # 粤语
         "wuu": "zh",  # 吴语
         "zh-cn": "zh",
-        "zh-tw": "x", # 繁体设置为x
+        "zh-tw": "x",  # 繁体设置为x
         "ko": "ko",
         "ja": "ja",
         "en": "en",
     }
 
-    
     def getTexts(text):
         lang_splitter = LangSplitter(lang_map=LangSegmenter.DEFAULT_LANG_MAP)
         substr = lang_splitter.split_by_lang(text=text)
@@ -95,18 +100,18 @@ class LangSegmenter():
         lang_list: list[dict] = []
 
         for _, item in enumerate(substr):
-            dict_item = {'lang':item.lang,'text':item.text}
+            dict_item = {"lang": item.lang, "text": item.text}
 
             # 处理短英文被识别为其他语言的问题
-            if full_en(dict_item['text']):  
-                dict_item['lang'] = 'en'
-                lang_list = merge_lang(lang_list,dict_item)
+            if full_en(dict_item["text"]):
+                dict_item["lang"] = "en"
+                lang_list = merge_lang(lang_list, dict_item)
                 continue
 
             # 处理非日语夹日文的问题(不包含CJK)
             ja_list: list[dict] = []
-            if dict_item['lang'] != 'ja':
-                ja_list = split_jako('ja',dict_item)
+            if dict_item["lang"] != "ja":
+                ja_list = split_jako("ja", dict_item)
 
             if not ja_list:
                 ja_list.append(dict_item)
@@ -115,8 +120,8 @@ class LangSegmenter():
             ko_list: list[dict] = []
             temp_list: list[dict] = []
             for _, ko_item in enumerate(ja_list):
-                if ko_item["lang"] != 'ko':
-                    ko_list = split_jako('ko',ko_item)
+                if ko_item["lang"] != "ko":
+                    ko_list = split_jako("ko", ko_item)
 
                 if ko_list:
                     temp_list.extend(ko_list)
@@ -126,50 +131,50 @@ class LangSegmenter():
             # 未存在非日韩文夹日韩文
             if len(temp_list) == 1:
                 # 未知语言检查是否为CJK
-                if dict_item['lang'] == 'x':
-                    cjk_text = full_cjk(dict_item['text'])
+                if dict_item["lang"] == "x":
+                    cjk_text = full_cjk(dict_item["text"])
                     if cjk_text:
-                        dict_item = {'lang':'zh','text':cjk_text}
-                        lang_list = merge_lang(lang_list,dict_item)
+                        dict_item = {"lang": "zh", "text": cjk_text}
+                        lang_list = merge_lang(lang_list, dict_item)
                     else:
-                        lang_list = merge_lang(lang_list,dict_item)
+                        lang_list = merge_lang(lang_list, dict_item)
                     continue
                 else:
-                    lang_list = merge_lang(lang_list,dict_item)
+                    lang_list = merge_lang(lang_list, dict_item)
                     continue
 
             # 存在非日韩文夹日韩文
             for _, temp_item in enumerate(temp_list):
                 # 未知语言检查是否为CJK
-                if temp_item['lang'] == 'x':
-                    cjk_text = full_cjk(dict_item['text'])
+                if temp_item["lang"] == "x":
+                    cjk_text = full_cjk(dict_item["text"])
                     if cjk_text:
-                        dict_item = {'lang':'zh','text':cjk_text}
-                        lang_list = merge_lang(lang_list,dict_item)
+                        dict_item = {"lang": "zh", "text": cjk_text}
+                        lang_list = merge_lang(lang_list, dict_item)
                     else:
-                        lang_list = merge_lang(lang_list,dict_item)
+                        lang_list = merge_lang(lang_list, dict_item)
                 else:
-                    lang_list = merge_lang(lang_list,temp_item)
+                    lang_list = merge_lang(lang_list, temp_item)
 
         temp_list = lang_list
         lang_list = []
         for _, temp_item in enumerate(temp_list):
-            if temp_item['lang'] == 'x':
+            if temp_item["lang"] == "x":
                 if lang_list:
-                    temp_item['lang'] = lang_list[-1]['lang']
+                    temp_item["lang"] = lang_list[-1]["lang"]
                 elif len(temp_list) > 1:
-                    temp_item['lang'] = temp_list[1]['lang']
+                    temp_item["lang"] = temp_list[1]["lang"]
                 else:
-                    temp_item['lang'] = 'zh'
+                    temp_item["lang"] = "zh"
 
-            lang_list = merge_lang(lang_list,temp_item)
+            lang_list = merge_lang(lang_list, temp_item)
 
         return lang_list
-    
+
 
 if __name__ == "__main__":
     text = "MyGO?,你也喜欢まいご吗？"
     print(LangSegmenter.getTexts(text))
 
     text = "ねえ、知ってる？最近、僕は天文学を勉強してるんだ。君の瞳が星空みたいにキラキラしてるからさ。"
-    print(LangSegmenter.getTexts(text))
\ No newline at end of file
+    print(LangSegmenter.getTexts(text))
diff --git a/GPT_SoVITS/text/g2pw/onnx_api.py b/GPT_SoVITS/text/g2pw/onnx_api.py
index a8268107..52eed443 100644
--- a/GPT_SoVITS/text/g2pw/onnx_api.py
+++ b/GPT_SoVITS/text/g2pw/onnx_api.py
@@ -3,7 +3,6 @@
 
 import json
 import os
-import traceback
 import warnings
 import zipfile
 from typing import Any, Dict, List, Tuple
@@ -23,8 +22,9 @@ from .utils import load_config
 onnxruntime.set_default_logger_severity(3)
 try:
     onnxruntime.preload_dlls()
-except:pass
-    #traceback.print_exc()
+except:
+    pass
+    # traceback.print_exc()
 warnings.filterwarnings("ignore")
 
 model_version = "1.1"
diff --git a/GPT_SoVITS/text/tone_sandhi.py b/GPT_SoVITS/text/tone_sandhi.py
index e9a279c6..4ed73781 100644
--- a/GPT_SoVITS/text/tone_sandhi.py
+++ b/GPT_SoVITS/text/tone_sandhi.py
@@ -655,11 +655,7 @@ class ToneSandhi:
         while i < len(seg):
             word, pos = seg[i]
             merged = False
-            if (
-                i - 1 >= 0
-                and word == "一"
-                and i + 1 < len(seg)
-            ):
+            if i - 1 >= 0 and word == "一" and i + 1 < len(seg):
                 last = new_seg[-1] if new_seg else seg[i - 1]
                 if last[0] == seg[i + 1][0] and last[1] == "v" and seg[i + 1][1] == "v":
                     combined = last[0] + "一" + seg[i + 1][0]
diff --git a/README.md b/README.md
index d03514f0..978bb3ca 100644
--- a/README.md
+++ b/README.md
@@ -9,10 +9,14 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
 
 <!-- img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br> -->
 
-[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/🤗%20-online%20demo-yellow.svg?style=for-the-badge)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
-[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG)
+[![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
+
+[![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
+[![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/en/Changelog_EN.md)
 
 **English** | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md) | [**Türkçe**](./docs/tr/README.md)
 
@@ -128,8 +132,9 @@ Due to rapid development in the codebase and a slower Docker image release cycle
 
 - Check [Docker Hub](https://hub.docker.com/r/xxxxrt666/gpt-sovits) for the latest available image tags
 - Choose an appropriate image tag for your environment
-- `Lite` means the Docker image does not include ASR models and UVR5 models. You can manually download the UVR5 models, while the program will automatically download the ASR models as needed
+- `Lite` means the Docker image **does not include** ASR models and UVR5 models. You can manually download the UVR5 models, while the program will automatically download the ASR models as needed
 - The appropriate architecture image (amd64/arm64) will be automatically pulled during Docker Compose
+- Docker Compose will mount **all files** in the current directory. Please switch to the project root directory and **pull the latest code** before using the Docker image
 - Optionally, build the image locally using the provided Dockerfile for the most up-to-date changes
 
 #### Environment Variables
@@ -333,7 +338,7 @@ Use v4 from v1/v2/v3 environment:
 New Features:
 
 1. Slightly higher VRAM usage than v2, surpassing v4's performance, with v2's hardware cost and speed.
-   [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7))
+   [more details](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7)>)
 
 2.v1/v2 and the v2Pro series share the same characteristics, while v3/v4 have similar features. For training sets with average audio quality, v1/v2/v2Pro can deliver decent results, but v3/v4 cannot. Additionally, the synthesized tone and timebre of v3/v4 lean more toward the reference audio rather than the overall training set.
 
diff --git a/api.py b/api.py
index b7e94e77..dc2e8826 100644
--- a/api.py
+++ b/api.py
@@ -199,6 +199,8 @@ def is_full(*items):  # 任意一项为空返回False
 
 
 bigvgan_model = hifigan_model = sv_cn_model = None
+
+
 def clean_hifigan_model():
     global hifigan_model
     if hifigan_model:
@@ -208,6 +210,8 @@ def clean_hifigan_model():
             torch.cuda.empty_cache()
         except:
             pass
+
+
 def clean_bigvgan_model():
     global bigvgan_model
     if bigvgan_model:
@@ -217,6 +221,8 @@ def clean_bigvgan_model():
             torch.cuda.empty_cache()
         except:
             pass
+
+
 def clean_sv_cn_model():
     global sv_cn_model
     if sv_cn_model:
@@ -229,7 +235,7 @@ def clean_sv_cn_model():
 
 
 def init_bigvgan():
-    global bigvgan_model, hifigan_model,sv_cn_model
+    global bigvgan_model, hifigan_model, sv_cn_model
     from BigVGAN import bigvgan
 
     bigvgan_model = bigvgan.BigVGAN.from_pretrained(
@@ -247,7 +253,7 @@ def init_bigvgan():
 
 
 def init_hifigan():
-    global hifigan_model, bigvgan_model,sv_cn_model
+    global hifigan_model, bigvgan_model, sv_cn_model
     hifigan_model = Generator(
         initial_channel=100,
         resblock="1",
@@ -262,7 +268,9 @@ def init_hifigan():
     hifigan_model.eval()
     hifigan_model.remove_weight_norm()
     state_dict_g = torch.load(
-        "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), map_location="cpu", weights_only=False
+        "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
+        map_location="cpu",
+        weights_only=False,
     )
     print("loading vocoder", hifigan_model.load_state_dict(state_dict_g))
     if is_half == True:
@@ -272,19 +280,21 @@ def init_hifigan():
 
 
 from sv import SV
+
+
 def init_sv_cn():
     global hifigan_model, bigvgan_model, sv_cn_model
     sv_cn_model = SV(device, is_half)
 
 
-resample_transform_dict={}
-def resample(audio_tensor, sr0,sr1,device):
+resample_transform_dict = {}
+
+
+def resample(audio_tensor, sr0, sr1, device):
     global resample_transform_dict
-    key="%s-%s-%s"%(sr0,sr1,str(device))
+    key = "%s-%s-%s" % (sr0, sr1, str(device))
     if key not in resample_transform_dict:
-        resample_transform_dict[key] = torchaudio.transforms.Resample(
-            sr0, sr1
-        ).to(device)
+        resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
     return resample_transform_dict[key](audio_tensor)
 
 
@@ -370,6 +380,7 @@ from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
 
 def get_sovits_weights(sovits_path):
     from config import pretrained_sovits_name
+
     path_sovits_v3 = pretrained_sovits_name["v3"]
     path_sovits_v4 = pretrained_sovits_name["v4"]
     is_exist_s2gv3 = os.path.exists(path_sovits_v3)
@@ -628,15 +639,17 @@ class DictToAttrRecursive(dict):
 
 
 def get_spepc(hps, filename, dtype, device, is_v2pro=False):
-    sr1=int(hps.data.sampling_rate)
-    audio, sr0=torchaudio.load(filename)
-    if sr0!=sr1:
-        audio=audio.to(device)
-        if(audio.shape[0]==2):audio=audio.mean(0).unsqueeze(0)
-        audio=resample(audio,sr0,sr1,device)
+    sr1 = int(hps.data.sampling_rate)
+    audio, sr0 = torchaudio.load(filename)
+    if sr0 != sr1:
+        audio = audio.to(device)
+        if audio.shape[0] == 2:
+            audio = audio.mean(0).unsqueeze(0)
+        audio = resample(audio, sr0, sr1, device)
     else:
-        audio=audio.to(device)
-        if(audio.shape[0]==2):audio=audio.mean(0).unsqueeze(0)
+        audio = audio.to(device)
+        if audio.shape[0] == 2:
+            audio = audio.mean(0).unsqueeze(0)
 
     maxx = audio.abs().max()
     if maxx > 1:
@@ -649,10 +662,10 @@ def get_spepc(hps, filename, dtype, device, is_v2pro=False):
         hps.data.win_length,
         center=False,
     )
-    spec=spec.to(dtype)
-    if is_v2pro==True:
-        audio=resample(audio,sr1,16000,device).to(dtype)
-    return spec,audio
+    spec = spec.to(dtype)
+    if is_v2pro == True:
+        audio = resample(audio, sr1, 16000, device).to(dtype)
+    return spec, audio
 
 
 def pack_audio(audio_bytes, data, rate):
@@ -872,29 +885,29 @@ def get_tts_wav(
         prompt_semantic = codes[0, 0]
         prompt = prompt_semantic.unsqueeze(0).to(device)
 
-        is_v2pro = version in {"v2Pro","v2ProPlus"}
+        is_v2pro = version in {"v2Pro", "v2ProPlus"}
         if version not in {"v3", "v4"}:
             refers = []
             if is_v2pro:
-                sv_emb= [] 
+                sv_emb = []
                 if sv_cn_model == None:
                     init_sv_cn()
             if inp_refs:
                 for path in inp_refs:
-                    try:#####这里加上提取sv的逻辑，要么一堆sv一堆refer，要么单个sv单个refer
-                        refer,audio_tensor = get_spepc(hps, path.name, dtype, device, is_v2pro)
+                    try:  #####这里加上提取sv的逻辑，要么一堆sv一堆refer，要么单个sv单个refer
+                        refer, audio_tensor = get_spepc(hps, path.name, dtype, device, is_v2pro)
                         refers.append(refer)
                         if is_v2pro:
                             sv_emb.append(sv_cn_model.compute_embedding3(audio_tensor))
                     except Exception as e:
                         logger.error(e)
             if len(refers) == 0:
-                refers,audio_tensor = get_spepc(hps, ref_wav_path, dtype, device, is_v2pro)
-                refers=[refers]
+                refers, audio_tensor = get_spepc(hps, ref_wav_path, dtype, device, is_v2pro)
+                refers = [refers]
                 if is_v2pro:
-                    sv_emb=[sv_cn_model.compute_embedding3(audio_tensor)]
+                    sv_emb = [sv_cn_model.compute_embedding3(audio_tensor)]
         else:
-            refer,audio_tensor = get_spepc(hps, ref_wav_path, dtype, device)
+            refer, audio_tensor = get_spepc(hps, ref_wav_path, dtype, device)
 
     t1 = ttime()
     # os.environ['version'] = version
@@ -937,14 +950,22 @@ def get_tts_wav(
         if version not in {"v3", "v4"}:
             if is_v2pro:
                 audio = (
-                    vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed,sv_emb=sv_emb)
+                    vq_model.decode(
+                        pred_semantic,
+                        torch.LongTensor(phones2).to(device).unsqueeze(0),
+                        refers,
+                        speed=speed,
+                        sv_emb=sv_emb,
+                    )
                     .detach()
                     .cpu()
                     .numpy()[0, 0]
                 )
             else:
                 audio = (
-                    vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed)
+                    vq_model.decode(
+                        pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed
+                    )
                     .detach()
                     .cpu()
                     .numpy()[0, 0]
@@ -1108,7 +1129,6 @@ def handle(
         if not default_refer.is_ready():
             return JSONResponse({"code": 400, "message": "未指定参考音频且接口无预设"}, status_code=400)
 
-
     if cut_punc == None:
         text = cut_text(text, default_cut_punc)
     else:
diff --git a/config.py b/config.py
index 8f4be146..fdc11c0a 100644
--- a/config.py
+++ b/config.py
@@ -144,7 +144,8 @@ webui_port_subfix = 9871
 
 api_port = 9880
 
-#Thanks to the contribution of @Karasukaigan and @XXXXRT666
+
+# Thanks to the contribution of @Karasukaigan and @XXXXRT666
 def get_device_dtype_sm(idx: int) -> tuple[torch.device, torch.dtype, float, float]:
     cpu = torch.device("cpu")
     cuda = torch.device(f"cuda:{idx}")
@@ -157,10 +158,13 @@ def get_device_dtype_sm(idx: int) -> tuple[torch.device, torch.dtype, float, flo
     mem_gb = mem_bytes / (1024**3) + 0.4
     major, minor = capability
     sm_version = major + minor / 10.0
-    is_16_series = bool(re.search(r"16\d{2}", name))and sm_version == 7.5
-    if mem_gb < 4 or sm_version < 5.3:return cpu, torch.float32, 0.0, 0.0
-    if sm_version == 6.1 or is_16_series==True:return cuda, torch.float32, sm_version, mem_gb
-    if sm_version > 6.1:return cuda, torch.float16, sm_version, mem_gb
+    is_16_series = bool(re.search(r"16\d{2}", name)) and sm_version == 7.5
+    if mem_gb < 4 or sm_version < 5.3:
+        return cpu, torch.float32, 0.0, 0.0
+    if sm_version == 6.1 or is_16_series == True:
+        return cuda, torch.float32, sm_version, mem_gb
+    if sm_version > 6.1:
+        return cuda, torch.float16, sm_version, mem_gb
     return cpu, torch.float32, 0.0, 0.0
 
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 9703d0c7..f0fc2b43 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -12,10 +12,6 @@ services:
       - "9880:9880"
     volumes:
       - .:/workspace/GPT-SoVITS
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel
-      - /dev/null:/workspace/GPT-SoVITS/tools/asr/models
-      - /dev/null:/workspace/GPT-SoVITS/tools/uvr5/uvr5_weights
     environment:
       - is_half=true
     tty: true
@@ -34,10 +30,6 @@ services:
       - "9880:9880"
     volumes:
       - .:/workspace/GPT-SoVITS
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel
-      - /dev/null:/workspace/GPT-SoVITS/tools/asr/models
-      - /dev/null:/workspace/GPT-SoVITS/tools/uvr5/uvr5_weights
       - tools/asr/models:/workspace/models/asr_models
       - tools/uvr5/uvr5_weights:/workspace/models/uvr5_weights
     environment:
@@ -58,10 +50,6 @@ services:
       - "9880:9880"
     volumes:
       - .:/workspace/GPT-SoVITS
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel
-      - /dev/null:/workspace/GPT-SoVITS/tools/asr/models
-      - /dev/null:/workspace/GPT-SoVITS/tools/uvr5/uvr5_weights
     environment:
       - is_half=true
     tty: true
@@ -80,10 +68,6 @@ services:
       - "9880:9880"
     volumes:
       - .:/workspace/GPT-SoVITS
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models
-      - /dev/null:/workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel
-      - /dev/null:/workspace/GPT-SoVITS/tools/asr/models
-      - /dev/null:/workspace/GPT-SoVITS/tools/uvr5/uvr5_weights
       - tools/asr/models:/workspace/models/asr_models
       - tools/uvr5/uvr5_weights:/workspace/models/uvr5_weights
     environment:
diff --git a/docs/cn/README.md b/docs/cn/README.md
index e674d9cf..70ce4285 100644
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@@ -7,12 +7,14 @@
 
 <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
-<!-- img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br> -->
+[![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
-[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/🤗%20-online%20demo-yellow.svg?style=for-the-badge)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
-[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG)
+[![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
+[![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
+[![Change Log](https://img.shields.io/badge/更新日志-查看更新-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/cn/Changelog_CN.md)
 
 [**English**](../../README.md) | **中文简体** | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
 
@@ -128,8 +130,9 @@ brew install ffmpeg
 
 - 前往 [Docker Hub](https://hub.docker.com/r/xxxxrt666/gpt-sovits) 查看最新可用的镜像标签(tags)
 - 根据你的运行环境选择合适的镜像标签
-- `Lite` Docker 镜像不包含 ASR 模型和 UVR5 模型. 你可以自行下载 UVR5 模型, ASR 模型则会在需要时由程序自动下载
+- `Lite` Docker 镜像**不包含** ASR 模型和 UVR5 模型. 你可以自行下载 UVR5 模型, ASR 模型则会在需要时由程序自动下载
 - 在使用 Docker Compose 时, 会自动拉取适配的架构镜像 (amd64 或 arm64)
+- Docker Compose 将会挂载当前目录的**所有文件**, 请在使用 Docker 镜像前先切换到项目根目录并**拉取代码更新**
 - 可选：为了获得最新的更改, 你可以使用提供的 Dockerfile 在本地构建镜像
 
 #### 环境变量
@@ -329,7 +332,7 @@ python webui.py
 新特性：
 
 1. **相比 V2 占用稍高显存, 性能超过 V4, 在保留 V2 硬件成本和推理速度优势的同时实现更高音质.**
-   [更多详情](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7))
+   [更多详情](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7)>)
 
 2. V1/V2 与 V2Pro 系列具有相同特性, V3/V4 则具备相近功能. 对于平均音频质量较低的训练集, V1/V2/V2Pro 可以取得较好的效果, 但 V3/V4 无法做到. 此外, V3/V4 合成的声音更偏向参考音频, 而不是整体训练集的风格.
 
diff --git a/docs/ja/README.md b/docs/ja/README.md
index d4d3081e..c3c42cd8 100644
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@@ -5,12 +5,16 @@
 
 [![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/RVC-Boss/GPT-SoVITS)
 
-<img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br>
+<a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
-[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/🤗%20-online%20demo-yellow.svg?style=for-the-badge)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
-[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG)
+[![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
+
+[![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
+[![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/ja/Changelog_JA.md)
 
 [**English**](../../README.md) | [**中文简体**](../cn/README.md) | **日本語** | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
 
@@ -122,8 +126,9 @@ brew install ffmpeg
 
 - [Docker Hub](https://hub.docker.com/r/xxxxrt666/gpt-sovits) で最新のイメージタグを確認してください
 - 環境に合った適切なイメージタグを選択してください
-- `Lite` とは、Docker イメージに ASR モデルおよび UVR5 モデルが含まれていないことを意味します. UVR5 モデルは手動でダウンロードし、ASR モデルは必要に応じてプログラムが自動的にダウンロードします
+- `Lite` とは、Docker イメージに ASR モデルおよび UVR5 モデルが**含まれていない**ことを意味します. UVR5 モデルは手動でダウンロードし、ASR モデルは必要に応じてプログラムが自動的にダウンロードします
 - Docker Compose 実行時に、対応するアーキテクチャ (amd64 または arm64) のイメージが自動的に取得されます
+- Docker Compose は現在のディレクトリ内の**すべてのファイル**をマウントします. Docker イメージを使用する前に、プロジェクトのルートディレクトリに移動し、**コードを最新の状態に更新**してください
 - オプション：最新の変更を反映させるため、提供されている Dockerfile を使ってローカルでイメージをビルドすることも可能です
 
 #### 環境変数
@@ -304,7 +309,7 @@ v2 環境から v3 を使用する方法:
 新機能:
 
 1. **V4 は、V3 で発生していた非整数倍アップサンプリングによる金属音の問題を修正し、音声がこもる問題を防ぐためにネイティブに 48kHz 音声を出力します（V3 はネイティブに 24kHz 音声のみ出力）**. 作者は V4 を V3 の直接的な置き換えとして推奨していますが、さらなるテストが必要です.
-   [詳細はこちら](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3v4%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+   [詳細はこちら](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3v4%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
 
 V1/V2/V3 環境から V4 への移行方法:
 
@@ -319,7 +324,7 @@ V1/V2/V3 環境から V4 への移行方法:
 新機能:
 
 1. **V2 と比較してやや高いメモリ使用量ですが、ハードウェアコストと推論速度は維持しつつ、V4 よりも高い性能と音質を実現します. **
-   [詳細はこちら](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7))
+   [詳細はこちら](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7)>)
 
 2. V1/V2 と V2Pro シリーズは類似した特徴を持ち、V3/V4 も同様の機能を持っています. 平均音質が低いトレーニングセットの場合、V1/V2/V2Pro は良好な結果を出すことができますが、V3/V4 では対応できません. また、V3/V4 の合成音声はトレーニング全体ではなく、より参考音声に寄った音質になります.
 
diff --git a/docs/ko/README.md b/docs/ko/README.md
index c4c7c794..bfc70395 100644
--- a/docs/ko/README.md
+++ b/docs/ko/README.md
@@ -5,12 +5,16 @@
 
 [![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/RVC-Boss/GPT-SoVITS)
 
-<img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br>
+<a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
-[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/🤗%20-online%20demo-yellow.svg?style=for-the-badge)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
-[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG)
+[![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
+
+[![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
+[![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/ko/Changelog_KO.md)
 
 [**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | **한국어** | [**Türkçe**](../tr/README.md)
 
@@ -122,8 +126,9 @@ brew install ffmpeg
 
 - [Docker Hub](https://hub.docker.com/r/xxxxrt666/gpt-sovits)에서 최신 이미지 태그를 확인하세요
 - 환경에 맞는 적절한 이미지 태그를 선택하세요
-- `Lite` 는 Docker 이미지에 ASR 모델과 UVR5 모델이 포함되어 있지 않음을 의미합니다. UVR5 모델은 사용자가 직접 다운로드해야 하며, ASR 모델은 필요 시 프로그램이 자동으로 다운로드합니다
+- `Lite` 는 Docker 이미지에 ASR 모델과 UVR5 모델이 **포함되어 있지 않음**을 의미합니다. UVR5 모델은 사용자가 직접 다운로드해야 하며, ASR 모델은 필요 시 프로그램이 자동으로 다운로드합니다
 - Docker Compose 실행 시, 해당 아키텍처에 맞는 이미지(amd64 또는 arm64)가 자동으로 다운로드됩니다
+- Docker Compose는 현재 디렉터리의 **모든 파일**을 마운트합니다. Docker 이미지를 사용하기 전에 프로젝트 루트 디렉터리로 이동하여 코드를 **최신 상태로 업데이트**하세요
 - 선택 사항: 최신 변경사항을 반영하려면 제공된 Dockerfile을 사용하여 로컬에서 직접 이미지를 빌드할 수 있습니다
 
 #### 환경 변수
@@ -319,7 +324,7 @@ V1/V2/V3 환경에서 V4로 전환 방법:
 신규 기능:
 
 1. **V2보다 약간 높은 VRAM 사용량이지만 성능은 V4보다 우수하며, V2 수준의 하드웨어 비용과 속도를 유지합니다**.
-   [자세히 보기](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7))
+   [자세히 보기](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7)>)
 
 2. V1/V2와 V2Pro 시리즈는 유사한 특징을 가지며, V3/V4도 비슷한 기능을 가지고 있습니다. 평균 음질이 낮은 학습 데이터셋에서는 V1/V2/V2Pro가 좋은 결과를 내지만 V3/V4는 그렇지 못합니다. 또한 V3/V4의 합성 음색은 전체 학습 데이터셋보다는 참고 음성에 더 가깝습니다.
 
diff --git a/docs/tr/README.md b/docs/tr/README.md
index cc32691f..2ce02b60 100644
--- a/docs/tr/README.md
+++ b/docs/tr/README.md
@@ -7,12 +7,14 @@ Güçlü Birkaç Örnekli Ses Dönüştürme ve Metinden Konuşmaya Web Arayüz
 
 <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
-<!-- img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br> -->
+[![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
-[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/🤗%20-online%20demo-yellow.svg?style=for-the-badge)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
-[![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/dnrgs5GHfG)
+[![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
+[![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/tr/Changelog_TR.md)
 
 [**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | **Türkçe**
 
@@ -124,8 +126,9 @@ Kod tabanı hızla geliştiği halde Docker imajları daha yavaş yayınlandığ
 
 - En güncel kullanılabilir imaj etiketlerini görmek için [Docker Hub](https://hub.docker.com/r/xxxxrt666/gpt-sovits) adresini kontrol edin
 - Ortamınıza uygun bir imaj etiketi seçin
-- `Lite`, Docker imajında ASR modelleri ve UVR5 modellerinin bulunmadığı anlamına gelir. UVR5 modellerini manuel olarak indirebilirsiniz; ASR modelleri ise gerektiğinde program tarafından otomatik olarak indirilir
+- `Lite`, Docker imajında ASR modelleri ve UVR5 modellerinin **bulunmadığı** anlamına gelir. UVR5 modellerini manuel olarak indirebilirsiniz; ASR modelleri ise gerektiğinde program tarafından otomatik olarak indirilir
 - Docker Compose sırasında, uygun mimariye (amd64 veya arm64) ait imaj otomatik olarak indirilir
+- Docker Compose, mevcut dizindeki **tüm dosyaları** bağlayacaktır. Docker imajını kullanmadan önce lütfen proje kök dizinine geçin ve **en son kodu çekin**
 - Opsiyonel: En güncel değişiklikleri almak için, sağlanan Dockerfile ile yerel olarak imajı kendiniz oluşturabilirsiniz
 
 #### Ortam Değişkenleri
@@ -323,7 +326,7 @@ V1/V2/V3 ortamından V4'e geçiş:
 Yeni Özellikler:
 
 1. **V2 ile karşılaştırıldığında biraz daha yüksek VRAM kullanımı sağlar ancak V4'ten daha iyi performans gösterir; aynı donanım maliyeti ve hız avantajını korur**.
-   [Daha fazla bilgi](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7))
+   [Daha fazla bilgi](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90features-(%E5%90%84%E7%89%88%E6%9C%AC%E7%89%B9%E6%80%A7)>)
 
 2. V1/V2 ve V2Pro serisi benzer özelliklere sahipken, V3/V4 de yakın işlevleri paylaşır. Ortalama kalite düşük olan eğitim setleriyle V1/V2/V2Pro iyi sonuçlar verebilir ama V3/V4 veremez. Ayrıca, V3/V4’ün ürettiği ses tonu genel eğitim setine değil, referans ses örneğine daha çok benzemektedir.
 
diff --git a/install.sh b/install.sh
index eba18683..e6112bd3 100644
--- a/install.sh
+++ b/install.sh
@@ -5,14 +5,62 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
 
 cd "$SCRIPT_DIR" || exit 1
 
-set -e
+RESET="\033[0m"
+BOLD="\033[1m"
+ERROR="\033[1;31m[ERROR]: $RESET"
+WARNING="\033[1;33m[WARNING]: $RESET"
+INFO="\033[1;32m[INFO]: $RESET"
+SUCCESS="\033[1;34m[SUCCESS]: $RESET"
+
+set -eE
+set -o errtrace
+
+trap 'on_error $LINENO "$BASH_COMMAND" $?' ERR
+
+# shellcheck disable=SC2317
+on_error() {
+    local lineno="$1"
+    local cmd="$2"
+    local code="$3"
+
+    echo -e "${ERROR}${BOLD}Command \"${cmd}\" Failed${RESET} at ${BOLD}Line ${lineno}${RESET} with Exit Code ${BOLD}${code}${RESET}"
+    echo -e "${ERROR}${BOLD}Call Stack:${RESET}"
+    for ((i = ${#FUNCNAME[@]} - 1; i >= 1; i--)); do
+        echo -e "  in ${BOLD}${FUNCNAME[i]}()${RESET} at ${BASH_SOURCE[i]}:${BOLD}${BASH_LINENO[i - 1]}${RESET}"
+    done
+    exit "$code"
+}
+
+run_conda_quiet() {
+    local output
+    output=$(conda install --yes --quiet "$@" 2>&1) || {
+        echo -e "${ERROR} Conda install failed:\n$output"
+        exit 1
+    }
+}
+
+run_pip_quiet() {
+    local output
+    output=$(pip install "$@" 2>&1) || {
+        echo -e "${ERROR} Pip install failed:\n$output"
+        exit 1
+    }
+}
+
+run_wget_quiet() {
+    local output
+    output=$(wget --tries=25 --wait=5 --read-timeout=40 --retry-on-http-error=404 "$@" 2>&1) || {
+        echo -e "${ERROR} Wget failed:\n$output"
+        exit 1
+    }
+}
 
 if ! command -v conda &>/dev/null; then
-    echo "Conda Not Found"
+    echo -e "${ERROR}Conda Not Found"
     exit 1
 fi
 
-trap 'echo "Error Occured at \"$BASH_COMMAND\" with exit code $?"; exit 1' ERR
+run_conda_quiet gcc
 
 USE_CUDA=false
 USE_ROCM=false
@@ -34,8 +82,8 @@ print_help() {
     echo "  -h, --help                             Show this help message and exit"
     echo ""
     echo "Examples:"
-    echo "  bash install.sh --source HF --download-uvr5"
-    echo "  bash install.sh --source ModelScope"
+    echo "  bash install.sh --device CU128 --source HF --download-uvr5"
+    echo "  bash install.sh --device MPS --source ModelScope"
 }
 
 # Show help if no arguments provided
@@ -59,8 +107,8 @@ while [[ $# -gt 0 ]]; do
             USE_MODELSCOPE=true
             ;;
         *)
-            echo "Error: Invalid Download Source: $2"
-            echo "Choose From: [HF, HF-Mirror, ModelScope]"
+            echo -e "${ERROR}Error: Invalid Download Source: $2"
+            echo -e "${ERROR}Choose From: [HF, HF-Mirror, ModelScope]"
             exit 1
             ;;
         esac
@@ -86,8 +134,8 @@ while [[ $# -gt 0 ]]; do
             USE_CPU=true
             ;;
         *)
-            echo "Error: Invalid Device: $2"
-            echo "Choose From: [CU126, CU128, ROCM, MPS, CPU]"
+            echo -e "${ERROR}Error: Invalid Device: $2"
+            echo -e "${ERROR}Choose From: [CU126, CU128, ROCM, MPS, CPU]"
             exit 1
             ;;
         esac
@@ -102,22 +150,23 @@ while [[ $# -gt 0 ]]; do
         exit 0
         ;;
     *)
-        echo "Unknown Argument: $1"
-        echo "Use -h or --help to see available options."
+        echo -e "${ERROR}Unknown Argument: $1"
+        echo ""
+        print_help
         exit 1
         ;;
     esac
 done
 
 if ! $USE_CUDA && ! $USE_ROCM && ! $USE_CPU; then
-    echo "Error: Device is REQUIRED"
+    echo -e "${ERROR}Error: Device is REQUIRED"
     echo ""
     print_help
     exit 1
 fi
 
 if ! $USE_HF && ! $USE_HF_MIRROR && ! $USE_MODELSCOPE; then
-    echo "Error: Download Source is REQUIRED"
+    echo -e "${ERROR}Error: Download Source is REQUIRED"
     echo ""
     print_help
     exit 1
@@ -125,55 +174,65 @@ fi
 
 # 安装构建工具
 # Install build tools
+echo -e "${INFO}Detected system: $(uname -s) $(uname -r) $(uname -m)"
 if [ "$(uname)" != "Darwin" ]; then
     gcc_major_version=$(command -v gcc >/dev/null 2>&1 && gcc -dumpversion | cut -d. -f1 || echo 0)
     if [ "$gcc_major_version" -lt 11 ]; then
-        echo "Installing GCC & G++..."
-        conda install -c conda-forge gcc=11 gxx=11 -q -y
+        echo -e "${INFO}Installing GCC & G++..."
+        run_conda_quiet gcc=11 gxx=11
+        echo -e "${SUCCESS}GCC & G++ Installed..."
     else
-        echo "GCC >=11"
+        echo -e "${INFO}Detected GCC Version: $gcc_major_version"
+        echo -e "${INFO}Skip Installing GCC & G++ From Conda-Forge"
     fi
 else
     if ! xcode-select -p &>/dev/null; then
-        echo "Installing Xcode Command Line Tools..."
+        echo -e "${INFO}Installing Xcode Command Line Tools..."
         xcode-select --install
-    fi
-    echo "Waiting For Xcode Command Line Tools Installation Complete..."
-    while true; do
-        sleep 20
+        echo -e "${INFO}Waiting For Xcode Command Line Tools Installation Complete..."
+        while true; do
+            sleep 20
 
-        if xcode-select -p &>/dev/null; then
-            echo "Xcode Command Line Tools Installed"
-            break
-        else
-            echo "Installing，Please Wait..."
+            if xcode-select -p &>/dev/null; then
+                echo -e "${SUCCESS}Xcode Command Line Tools Installed"
+                break
+            else
+                echo -e "${INFO}Installing，Please Wait..."
+            fi
+        done
+    else
+        XCODE_PATH=$(xcode-select -p)
+        if [[ "$XCODE_PATH" == *"Xcode.app"* ]]; then
+            echo -e "${WARNING} Detected Xcode path: $XCODE_PATH"
+            echo -e "${WARNING} If your Xcode version does not match your macOS version, it may cause unexpected issues during compilation or package builds."
         fi
-    done
-    conda install -c conda-forge -q -y
+    fi
 fi
 
-echo "Installing ffmpeg and cmake..."
-conda install ffmpeg cmake make -q -y
+echo -e "${INFO}Installing FFmpeg & CMake..."
+run_conda_quiet ffmpeg cmake make
+echo -e "${SUCCESS}FFmpeg & CMake Installed"
 
-echo "Installing unzip..."
-conda install unzip -y --quiet
+echo -e "${INFO}Installing unzip..."
+run_conda_quiet unzip
+echo -e "${SUCCESS}unzip Installed"
 
 if [ "$USE_HF" = "true" ]; then
-    echo "Download Model From HuggingFace"
+    echo -e "${INFO}Download Model From HuggingFace"
     PRETRINED_URL="https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/pretrained_models.zip"
     G2PW_URL="https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/G2PWModel.zip"
     UVR5_URL="https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/uvr5_weights.zip"
     NLTK_URL="https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/nltk_data.zip"
     PYOPENJTALK_URL="https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/open_jtalk_dic_utf_8-1.11.tar.gz"
 elif [ "$USE_HF_MIRROR" = "true" ]; then
-    echo "Download Model From HuggingFace-Mirror"
+    echo -e "${INFO}Download Model From HuggingFace-Mirror"
     PRETRINED_URL="https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/pretrained_models.zip"
     G2PW_URL="https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/G2PWModel.zip"
     UVR5_URL="https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/uvr5_weights.zip"
     NLTK_URL="https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/nltk_data.zip"
     PYOPENJTALK_URL="https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/open_jtalk_dic_utf_8-1.11.tar.gz"
 elif [ "$USE_MODELSCOPE" = "true" ]; then
-    echo "Download Model From ModelScope"
+    echo -e "${INFO}Download Model From ModelScope"
     PRETRINED_URL="https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/pretrained_models.zip"
     G2PW_URL="https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/G2PWModel.zip"
     UVR5_URL="https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/uvr5_weights.zip"
@@ -181,118 +240,129 @@ elif [ "$USE_MODELSCOPE" = "true" ]; then
     PYOPENJTALK_URL="https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/open_jtalk_dic_utf_8-1.11.tar.gz"
 fi
 
-if [ "$WORKFLOW" = "true" ]; then
-    WGET_CMD=(wget -nv --tries=25 --wait=5 --read-timeout=40 --retry-on-http-error=404)
-else
-    WGET_CMD=(wget --tries=25 --wait=5 --read-timeout=40 --retry-on-http-error=404)
-fi
-
 if find -L "GPT_SoVITS/pretrained_models" -mindepth 1 ! -name '.gitignore' | grep -q .; then
-    echo "Pretrained Model Exists"
+    echo -e "${INFO}Pretrained Model Exists"
+    echo -e "${INFO}Skip Downloading Pretrained Models"
 else
-    echo "Download Pretrained Models"
-    "${WGET_CMD[@]}" "$PRETRINED_URL"
+    echo -e "${INFO}Downloading Pretrained Models..."
+    rm -rf pretrained_models.zip
+    run_wget_quiet "$PRETRINED_URL"
 
     unzip -q -o pretrained_models.zip -d GPT_SoVITS
     rm -rf pretrained_models.zip
+    echo -e "${SUCCESS}Pretrained Models Downloaded"
 fi
 
 if [ ! -d "GPT_SoVITS/text/G2PWModel" ]; then
-    echo "Download G2PWModel"
-    "${WGET_CMD[@]}" "$G2PW_URL"
+    echo -e "${INFO}Downloading G2PWModel.."
+    rm -rf G2PWModel.zip
+    run_wget_quiet "$G2PW_URL"
 
     unzip -q -o G2PWModel.zip -d GPT_SoVITS/text
     rm -rf G2PWModel.zip
+    echo -e "${SUCCESS}G2PWModel Downloaded"
 else
-    echo "G2PWModel Exists"
+    echo -e "${INFO}G2PWModel Exists"
+    echo -e "${INFO}Skip Downloading G2PWModel"
 fi
 
 if [ "$DOWNLOAD_UVR5" = "true" ]; then
     if find -L "tools/uvr5/uvr5_weights" -mindepth 1 ! -name '.gitignore' | grep -q .; then
-        echo "UVR5 Model Exists"
+        echo -e"${INFO}UVR5 Models Exists"
+        echo -e "${INFO}Skip Downloading UVR5 Models"
     else
-        echo "Download UVR5 Model"
-        "${WGET_CMD[@]}" "$UVR5_URL"
+        echo -e "${INFO}Downloading UVR5 Models..."
+        rm -rf uvr5_weights.zip
+        run_wget_quiet "$UVR5_URL"
 
         unzip -q -o uvr5_weights.zip -d tools/uvr5
         rm -rf uvr5_weights.zip
+        echo -e "${SUCCESS}UVR5 Models Downloaded"
     fi
 fi
 
 if [ "$USE_CUDA" = true ] && [ "$WORKFLOW" = false ]; then
-    echo "Checking for CUDA installation..."
+    echo -e "${INFO}Checking For Nvidia Driver Installation..."
     if command -v nvidia-smi &>/dev/null; then
-        echo "CUDA found."
+        echo "${INFO}Nvidia Driver Founded"
     else
+        echo -e "${WARNING}Nvidia Driver Not Found, Fallback to CPU"
         USE_CUDA=false
         USE_CPU=true
-        echo "CUDA not found."
     fi
 fi
 
 if [ "$USE_ROCM" = true ] && [ "$WORKFLOW" = false ]; then
-    echo "Checking for ROCm installation..."
+    echo -e "${INFO}Checking For ROCm Installation..."
     if [ -d "/opt/rocm" ]; then
-        echo "ROCm found."
+        echo -e "${INFO}ROCm Founded"
         if grep -qi "microsoft" /proc/version; then
-            echo "You are running WSL."
+            echo -e "${INFO}WSL2 Founded"
             IS_WSL=true
         else
-            echo "You are NOT running WSL."
             IS_WSL=false
         fi
     else
+        echo -e "${WARNING}ROCm Not Found, Fallback to CPU"
         USE_ROCM=false
         USE_CPU=true
-        echo "ROCm not found."
     fi
 fi
 
 if [ "$USE_CUDA" = true ] && [ "$WORKFLOW" = false ]; then
-    echo "Installing PyTorch with CUDA support..."
     if [ "$CUDA" = 128 ]; then
-        pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128
+        echo -e "${INFO}Installing PyTorch For CUDA 12.8..."
+        run_pip_quiet torch torchaudio --index-url "https://download.pytorch.org/whl/cu128"
     elif [ "$CUDA" = 126 ]; then
-        pip install torch==2.6 torchaudio --index-url https://download.pytorch.org/whl/cu126
+        echo -e "${INFO}Installing PyTorch For CUDA 12.6..."
+        run_pip_quiet torch torchaudio --index-url "https://download.pytorch.org/whl/cu126"
     fi
 elif [ "$USE_ROCM" = true ] && [ "$WORKFLOW" = false ]; then
-    echo "Installing PyTorch with ROCm support..."
-    pip install torch==2.6 torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
+    echo -e "${INFO}Installing PyTorch For ROCm 6.2..."
+    run_pip_quiet torch torchaudio --index-url "https://download.pytorch.org/whl/rocm6.2"
 elif [ "$USE_CPU" = true ] && [ "$WORKFLOW" = false ]; then
-    echo "Installing PyTorch for CPU..."
-    pip install torch==2.6 torchaudio --index-url https://download.pytorch.org/whl/cpu
+    echo -e "${INFO}Installing PyTorch For CPU..."
+    run_pip_quiet torch torchaudio --index-url "https://download.pytorch.org/whl/cpu"
 elif [ "$WORKFLOW" = false ]; then
-    echo "Unknown Err"
+    echo -e "${ERROR}Unknown Err"
     exit 1
 fi
+echo -e "${SUCCESS}PyTorch Installed"
 
-echo "Installing Python dependencies from requirements.txt..."
+echo -e "${INFO}Installing Python Dependencies From requirements.txt..."
 
-# 刷新环境
-# Refresh environment
 hash -r
 
-pip install -r extra-req.txt --no-deps --quiet
+run_pip_quiet -r extra-req.txt --no-deps
 
-pip install -r requirements.txt --quiet
+run_pip_quiet -r requirements.txt
+
+echo -e "${SUCCESS}Python Dependencies Installed"
 
 PY_PREFIX=$(python -c "import sys; print(sys.prefix)")
 PYOPENJTALK_PREFIX=$(python -c "import os, pyopenjtalk; print(os.path.dirname(pyopenjtalk.__file__))")
 
-"${WGET_CMD[@]}" "$NLTK_URL" -O nltk_data.zip
+echo -e "${INFO}Downloading NLTK Data..."
+rm -rf nltk_data.zip
+run_wget_quiet "$NLTK_URL" -O nltk_data.zip
 unzip -q -o nltk_data -d "$PY_PREFIX"
 rm -rf nltk_data.zip
+echo -e "${SUCCESS}NLTK Data Downloaded"
 
-"${WGET_CMD[@]}" "$PYOPENJTALK_URL" -O open_jtalk_dic_utf_8-1.11.tar.gz
-tar -xvzf open_jtalk_dic_utf_8-1.11.tar.gz -C "$PYOPENJTALK_PREFIX"
+echo -e "${INFO}Downloading Open JTalk Dict..."
 rm -rf open_jtalk_dic_utf_8-1.11.tar.gz
+run_wget_quiet "$PYOPENJTALK_URL" -O open_jtalk_dic_utf_8-1.11.tar.gz
+tar -xzf open_jtalk_dic_utf_8-1.11.tar.gz -C "$PYOPENJTALK_PREFIX"
+rm -rf open_jtalk_dic_utf_8-1.11.tar.gz
+echo -e "${SUCCESS}Open JTalk Dic Downloaded"
 
 if [ "$USE_ROCM" = true ] && [ "$IS_WSL" = true ]; then
-    echo "Update to WSL compatible runtime lib..."
+    echo -e "${INFO}Updating WSL Compatible Runtime Lib For ROCm..."
     location=$(pip show torch | grep Location | awk -F ": " '{print $2}')
     cd "${location}"/torch/lib/ || exit
     rm libhsa-runtime64.so*
     cp /opt/rocm/lib/libhsa-runtime64.so.1.2 libhsa-runtime64.so
+    echo -e "${SUCCESS}ROCm Runtime Lib Updated..."
 fi
 
-echo "Installation completed successfully!"
+echo -e "${SUCCESS}Installation Completed"
diff --git a/tools/assets.py b/tools/assets.py
index 6d727384..28d0f6b4 100644
--- a/tools/assets.py
+++ b/tools/assets.py
@@ -1,81 +1,38 @@
 js = """
-function createGradioAnimation() {
+function deleteTheme() {
     
-    const params = new URLSearchParams(window.location.search);
-    if (params.get('__theme') !== 'light') { 
-        params.set('__theme', 'light'); // 仅当 __theme 不是 'light' 时设置为 'light'
-        window.location.search = params.toString(); // 更新 URL，触发页面刷新
-    }
-    
-    var container = document.createElement('div');
-    container.id = 'gradio-animation';
-    container.style.fontSize = '2em';
-    container.style.fontWeight = '500';
-    container.style.textAlign = 'center';
-    container.style.marginBottom = '20px';
-    container.style.fontFamily = '-apple-system, sans-serif, Arial, Calibri';
+const params = new URLSearchParams(window.location.search);
+if (params.has('__theme')) {
+    params.delete('__theme');
+    const newUrl = `${window.location.pathname}?${params.toString()}`;
+    window.location.replace(newUrl);
+}
 
-    var text = 'Welcome to GPT-SoVITS !';
-    for (var i = 0; i < text.length; i++) {
-        (function(i){
-            setTimeout(function(){
-                var letter = document.createElement('span');
-                letter.style.opacity = '0';
-                letter.style.transition = 'opacity 0.5s';
-                letter.innerText = text[i];
-
-                container.appendChild(letter);
-
-                setTimeout(function() {
-                    letter.style.opacity = '1';
-                }, 50);
-            }, i * 250);
-        })(i);
-    }
-    return 'Animation created';
 }
 """
 
-
 css = """
 /* CSSStyleRule */
-
 .markdown {
-    background-color: lightblue;
     padding: 6px 10px;
 }
 
-.checkbox_info {
-    color: var(--block-title-text-color) !important;
-    font-size: var(--block-title-text-size) !important;
-    font-weight: var(--block-title-text-weight) !important;
-    height: 22px;
-    margin-bottom: 8px !important;
+@media (prefers-color-scheme: light) {
+    .markdown {
+        background-color: lightblue;
+        color: #000;
+    }
+}
+
+@media (prefers-color-scheme: dark) {
+    .markdown {
+        background-color: #4b4b4b;
+        color: rgb(244, 244, 245);
+    }
 }
 
 ::selection {
-    background: #ffc078; !important;
-}
-
-#checkbox_train_dpo input[type="checkbox"]{
-    margin-top: 6px;
-}
-
-#checkbox_train_dpo span {
-    margin-top: 6px;
-}
-
-#checkbox_align_train {
-    padding-top: 18px;
-    padding-bottom: 18px;
-}
-
-#checkbox_align_infer input[type="checkbox"] {
-    margin-top: 10px;
-}
-
-#checkbox_align_infer span {
-    margin-top: 10px;
+    background: #ffc078 !important;
 }
 
 footer {
@@ -91,6 +48,7 @@ footer * {
 }
 
 """
+
 top_html = """
 <div align="center">
     <div style="margin-bottom: 5px; font-size: 15px;">{}</div>
diff --git a/tools/my_utils.py b/tools/my_utils.py
index d26a372e..04f1a98a 100644
--- a/tools/my_utils.py
+++ b/tools/my_utils.py
@@ -109,7 +109,7 @@ def check_details(path_list=None, is_train=False, is_dataset_processing=False):
         if os.path.exists(wav_path):
             ...
         else:
-            gr.Warning(wav_path+i18n("路径错误"))
+            gr.Warning(wav_path + i18n("路径错误"))
         return
     if is_train:
         path_list.append(os.path.join(path_list[0], "2-name2text.txt"))
diff --git a/tools/subfix_webui.py b/tools/subfix_webui.py
index 3f2fd03e..51a7dfad 100644
--- a/tools/subfix_webui.py
+++ b/tools/subfix_webui.py
@@ -1,5 +1,6 @@
 import sys
 from tools.i18n.i18n import I18nAuto, scan_language_list
+
 language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
 i18n = I18nAuto(language=language)
 import argparse
@@ -309,7 +310,9 @@ if __name__ == "__main__":
 
     with gr.Blocks(analytics_enabled=False) as demo:
         gr.Markdown(
-            value=i18n("Submit Text: 将当前页所有文本框内容手工保存到内存和文件(翻页前后或者退出标注页面前如果没点这个按钮，你再翻回来就回滚了，白忙活。)")
+            value=i18n(
+                "Submit Text: 将当前页所有文本框内容手工保存到内存和文件(翻页前后或者退出标注页面前如果没点这个按钮，你再翻回来就回滚了，白忙活。)"
+            )
         )
         with gr.Row():
             btn_change_index = gr.Button("Change Index")
diff --git a/tools/uvr5/mdxnet.py b/tools/uvr5/mdxnet.py
index 6548ce24..98c75c1f 100644
--- a/tools/uvr5/mdxnet.py
+++ b/tools/uvr5/mdxnet.py
@@ -190,14 +190,14 @@ class Predictor:
             opt_path_vocal = path_vocal[:-4] + ".%s" % format
             opt_path_other = path_other[:-4] + ".%s" % format
             if os.path.exists(path_vocal):
-                os.system("ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path_vocal, opt_path_vocal))
+                os.system('ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path_vocal, opt_path_vocal))
                 if os.path.exists(opt_path_vocal):
                     try:
                         os.remove(path_vocal)
                     except:
                         pass
             if os.path.exists(path_other):
-                os.system("ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path_other, opt_path_other))
+                os.system('ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path_other, opt_path_other))
                 if os.path.exists(opt_path_other):
                     try:
                         os.remove(path_other)
diff --git a/tools/uvr5/vr.py b/tools/uvr5/vr.py
index 8f24ca6a..45429cca 100644
--- a/tools/uvr5/vr.py
+++ b/tools/uvr5/vr.py
@@ -140,7 +140,7 @@ class AudioPre:
                 )
                 if os.path.exists(path):
                     opt_format_path = path[:-4] + ".%s" % format
-                    cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
+                    cmd = 'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
                     print(cmd)
                     os.system(cmd)
                     if os.path.exists(opt_format_path):
@@ -177,7 +177,7 @@ class AudioPre:
                 )
                 if os.path.exists(path):
                     opt_format_path = path[:-4] + ".%s" % format
-                    cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
+                    cmd = 'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
                     print(cmd)
                     os.system(cmd)
                     if os.path.exists(opt_format_path):
@@ -307,7 +307,7 @@ class AudioPreDeEcho:
                 )
                 if os.path.exists(path):
                     opt_format_path = path[:-4] + ".%s" % format
-                    cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
+                    cmd = 'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
                     print(cmd)
                     os.system(cmd)
                     if os.path.exists(opt_format_path):
@@ -340,7 +340,7 @@ class AudioPreDeEcho:
                 )
                 if os.path.exists(path):
                     opt_format_path = path[:-4] + ".%s" % format
-                    cmd="ffmpeg -i \"%s\" -vn \"%s\" -q:a 2 -y" % (path, opt_format_path)
+                    cmd = 'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
                     print(cmd)
                     os.system(cmd)
                     if os.path.exists(opt_format_path):
diff --git a/webui.py b/webui.py
index 0e34987a..9981cfcc 100644
--- a/webui.py
+++ b/webui.py
@@ -507,7 +507,7 @@ def open1Ba(
 ):
     global p_train_SoVITS
     if p_train_SoVITS == None:
-        exp_name=exp_name.rstrip(" ")
+        exp_name = exp_name.rstrip(" ")
         config_file = (
             "GPT_SoVITS/configs/s2.json"
             if version not in {"v2Pro", "v2ProPlus"}
@@ -604,7 +604,7 @@ def open1Bb(
 ):
     global p_train_GPT
     if p_train_GPT == None:
-        exp_name=exp_name.rstrip(" ")
+        exp_name = exp_name.rstrip(" ")
         with open(
             "GPT_SoVITS/configs/s1longer.yaml" if version == "v1" else "GPT_SoVITS/configs/s1longer-v2.yaml"
         ) as f:

From d46c069e52caa0d75b5d1119b7dd918322e7b590 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Wed, 18 Jun 2025 10:38:54 +0800
Subject: [PATCH 02/27] Remove Debug Code (#2471)

---
 install.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/install.sh b/install.sh
index e6112bd3..c3d20852 100644
--- a/install.sh
+++ b/install.sh
@@ -33,7 +33,7 @@ on_error() {
 
 run_conda_quiet() {
     local output
-    output=$(conda install --yes --quiet "$@" 2>&1) || {
+    output=$(conda install --yes --quiet -c conda-forge "$@" 2>&1) || {
         echo -e "${ERROR} Conda install failed:\n$output"
         exit 1
     }
@@ -60,8 +60,6 @@ if ! command -v conda &>/dev/null; then
     exit 1
 fi
 
-run_conda_quiet gcc
-
 USE_CUDA=false
 USE_ROCM=false
 USE_CPU=false

From 4987df5a71696cb1333fc4c2108cc3491c4e64f5 Mon Sep 17 00:00:00 2001
From: Ella Zhang <144317607+EllaZhangCA@users.noreply.github.com>
Date: Thu, 19 Jun 2025 15:34:11 +0800
Subject: [PATCH 03/27] fixed syntax errors in api_v2.py (#2473)

---
 api_v2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/api_v2.py b/api_v2.py
index 87082074..5947df53 100644
--- a/api_v2.py
+++ b/api_v2.py
@@ -33,14 +33,14 @@ POST:
     "text_split_method": "cut0",  # str. text split method, see text_segmentation_method.py for details.
     "batch_size": 1,              # int. batch size for inference
     "batch_threshold": 0.75,      # float. threshold for batch splitting.
-    "split_bucket: True,          # bool. whether to split the batch into multiple buckets.
+    "split_bucket": True,         # bool. whether to split the batch into multiple buckets.
     "speed_factor":1.0,           # float. control the speed of the synthesized audio.
     "streaming_mode": False,      # bool. whether to return a streaming response.
     "seed": -1,                   # int. random seed for reproducibility.
     "parallel_infer": True,       # bool. whether to use parallel inference.
-    "repetition_penalty": 1.35    # float. repetition penalty for T2S model.
+    "repetition_penalty": 1.35,   # float. repetition penalty for T2S model.
     "sample_steps": 32,           # int. number of sampling steps for VITS model V3.
-    "super_sampling": False,       # bool. whether to use super-sampling for audio when using VITS model V3.
+    "super_sampling": False       # bool. whether to use super-sampling for audio when using VITS model V3.
 }
 ```
 

From 37f5abfcb4a6553652235909db2e124b6f8ff3a5 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Wed, 25 Jun 2025 14:52:27 +0800
Subject: [PATCH 04/27] Fix Issues with libstdcxx and conda sysroot (#2482)

---
 install.sh | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/install.sh b/install.sh
index c3d20852..ea8d2e2d 100644
--- a/install.sh
+++ b/install.sh
@@ -170,7 +170,13 @@ if ! $USE_HF && ! $USE_HF_MIRROR && ! $USE_MODELSCOPE; then
     exit 1
 fi
 
-# 安装构建工具
+case "$(uname -m)" in
+  x86_64|amd64)   SYSROOT_PKG="sysroot_linux-64>=2.28"       ;;
+  aarch64|arm64)  SYSROOT_PKG="sysroot_linux-aarch64>=2.28"  ;;
+  ppc64le)        SYSROOT_PKG="sysroot_linux-ppc64le>=2.28"  ;;
+  *) echo "Unsupported architecture: $(uname -m)"; exit 1 ;;
+esac
+
 # Install build tools
 echo -e "${INFO}Detected system: $(uname -s) $(uname -r) $(uname -m)"
 if [ "$(uname)" != "Darwin" ]; then
@@ -178,10 +184,14 @@ if [ "$(uname)" != "Darwin" ]; then
     if [ "$gcc_major_version" -lt 11 ]; then
         echo -e "${INFO}Installing GCC & G++..."
         run_conda_quiet gcc=11 gxx=11
+        run_conda_quiet "$SYSROOT_PKG"
         echo -e "${SUCCESS}GCC & G++ Installed..."
     else
         echo -e "${INFO}Detected GCC Version: $gcc_major_version"
         echo -e "${INFO}Skip Installing GCC & G++ From Conda-Forge"
+        echo -e "${INFO}Installing libstdcxx-ng From Conda-Forge"
+        run_conda_quiet "libstdcxx-ng>=$gcc_major_version"
+        echo -e "${SUCCESS}libstdcxx-ng=$gcc_major_version Installed..."
     fi
 else
     if ! xcode-select -p &>/dev/null; then

From 4839e8214862808bea45b86c7c26ff643b0175ee Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Fri, 27 Jun 2025 01:04:18 +0800
Subject: [PATCH 05/27] Add Windows Install Powershell Scripts (#2487)

---
 README.md         |   8 ++
 docs/cn/README.md |   6 ++
 docs/ko/README.md |   6 ++
 docs/tr/README.md |   6 ++
 install.ps1       | 241 ++++++++++++++++++++++++++++++++++++++++++++++
 install.sh        |  28 +++---
 6 files changed, 283 insertions(+), 12 deletions(-)
 create mode 100644 install.ps1

diff --git a/README.md b/README.md
index 978bb3ca..e67288ea 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,14 @@ If you are a Windows user (tested with win>=10), you can [download the integrate
 
 **Users in China can [download the package here](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#KTvnO).**
 
+Install the program by running the following commands:
+
+```pwsh
+conda create -n GPTSoVits python=3.10
+conda activate GPTSoVits
+pwsh -F install.ps1 --Device <CU126|CU128|CPU> --Source <HF|HF-Mirror|ModelScope> [--DownloadUVR5]
+```
+
 ### Linux
 
 ```bash
diff --git a/docs/cn/README.md b/docs/cn/README.md
index 70ce4285..77841b99 100644
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@@ -62,6 +62,12 @@
 
 **中国地区的用户可以[在此处下载整合包](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#KTvnO).**
 
+```pwsh
+conda create -n GPTSoVits python=3.10
+conda activate GPTSoVits
+pwsh -F install.ps1 --Device <CU126|CU128|CPU> --Source <HF|HF-Mirror|ModelScope> [--DownloadUVR5]
+```
+
 ### Linux
 
 ```bash
diff --git a/docs/ko/README.md b/docs/ko/README.md
index bfc70395..1028c00a 100644
--- a/docs/ko/README.md
+++ b/docs/ko/README.md
@@ -58,6 +58,12 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-
 
 Windows 사용자라면 (win>=10에서 테스트됨), [통합 패키지를 다운로드](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-v3lora-20250228.7z?download=true)한 후 압축을 풀고 _go-webui.bat_ 파일을 더블 클릭하면 GPT-SoVITS-WebUI를 시작할 수 있습니다.
 
+```pwsh
+conda create -n GPTSoVits python=3.10
+conda activate GPTSoVits
+pwsh -F install.ps1 --Device <CU126|CU128|CPU> --Source <HF|HF-Mirror|ModelScope> [--DownloadUVR5]
+```
+
 ### Linux
 
 ```bash
diff --git a/docs/tr/README.md b/docs/tr/README.md
index 2ce02b60..dd5d79b3 100644
--- a/docs/tr/README.md
+++ b/docs/tr/README.md
@@ -58,6 +58,12 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-
 
 Eğer bir Windows kullanıcısıysanız (win>=10 ile test edilmiştir), [entegre paketi indirin](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-v3lora-20250228.7z?download=true) ve _go-webui.bat_ dosyasına çift tıklayarak GPT-SoVITS-WebUI'yi başlatın.
 
+```pwsh
+conda create -n GPTSoVits python=3.10
+conda activate GPTSoVits
+pwsh -F install.ps1 --Device <CU126|CU128|CPU> --Source <HF|HF-Mirror|ModelScope> [--DownloadUVR5]
+```
+
 ### Linux
 
 ```bash
diff --git a/install.ps1 b/install.ps1
new file mode 100644
index 00000000..9c33ace8
--- /dev/null
+++ b/install.ps1
@@ -0,0 +1,241 @@
+Param (
+    [Parameter(Mandatory=$true)][ValidateSet("CU126", "CU128", "CPU")][string]$Device,
+    [Parameter(Mandatory=$true)][ValidateSet("HF", "HF-Mirror", "ModelScope")][string]$Source,
+    [switch]$DownloadUVR5
+)
+
+$global:ErrorActionPreference = 'Stop'
+
+trap {
+    Write-ErrorLog $_
+}
+
+function Write-ErrorLog {
+    param (
+        [System.Management.Automation.ErrorRecord]$ErrorRecord
+    )
+
+    Write-Host "`n[ERROR] Command failed:" -ForegroundColor Red
+    if (-not $ErrorRecord.Exception.Message){
+    } else {
+        Write-Host "Message:" -ForegroundColor Red 
+        $ErrorRecord.Exception.Message -split "`n" | ForEach-Object {
+            Write-Host "    $_"
+        }
+    }
+
+    Write-Host "Command:" -ForegroundColor Red  -NoNewline
+    Write-Host " $($ErrorRecord.InvocationInfo.Line)".Replace("`r", "").Replace("`n", "")
+    Write-Host "Location:" -ForegroundColor Red -NoNewline
+    Write-Host " $($ErrorRecord.InvocationInfo.ScriptName):$($ErrorRecord.InvocationInfo.ScriptLineNumber)"
+    Write-Host "Call Stack:" -ForegroundColor DarkRed
+    $ErrorRecord.ScriptStackTrace -split "`n" | ForEach-Object {
+        Write-Host "    $_" -ForegroundColor DarkRed
+    }
+
+    exit 1
+}
+
+function Write-Info($msg) {
+    Write-Host "[INFO]:" -ForegroundColor Green -NoNewline
+    Write-Host " $msg"
+}
+function Write-Success($msg) {
+    Write-Host "[SUCCESS]:" -ForegroundColor Blue -NoNewline
+    Write-Host " $msg"
+}
+
+
+function Invoke-Conda {
+    param (
+        [Parameter(ValueFromRemainingArguments = $true)]
+        [string[]]$Args
+    )
+
+    $output = & conda install -y -q -c conda-forge @Args 2>&1
+    $exitCode = $LASTEXITCODE
+
+    if ($exitCode -ne 0) {
+        Write-Host "Conda Install $Args Failed" -ForegroundColor Red
+        $errorMessages = @()
+        foreach ($item in $output) {
+            if ($item -is [System.Management.Automation.ErrorRecord]) {
+                $msg = $item.Exception.Message
+                Write-Host "$msg" -ForegroundColor Red
+                $errorMessages += $msg
+            }
+            else {
+                Write-Host $item
+                $errorMessages += $item
+            }
+        }
+        throw [System.Exception]::new(($errorMessages -join "`n"))
+    }
+}
+
+function Invoke-Pip {
+    param (
+        [Parameter(ValueFromRemainingArguments = $true)]
+        [string[]]$Args
+    )
+    
+    $output = & pip install @Args 2>&1
+    $exitCode = $LASTEXITCODE
+    
+    if ($exitCode -ne 0) {
+        $errorMessages = @()
+        Write-Host "Pip Install $Args Failed" -ForegroundColor Red
+        foreach ($item in $output) {
+            if ($item -is [System.Management.Automation.ErrorRecord]) {
+                $msg = $item.Exception.Message
+                Write-Host "$msg" -ForegroundColor Red
+                $errorMessages += $msg
+            }
+            else {
+                Write-Host $item
+                $errorMessages += $item
+            }
+        }
+        throw [System.Exception]::new(($errorMessages -join "`n"))
+    }
+}
+
+function Invoke-Download {
+    param (
+        [Parameter(Mandatory = $true)]
+        [string]$Uri,
+
+        [Parameter()]
+        [string]$OutFile
+    )
+
+    try {
+        $params = @{
+            Uri = $Uri
+        }
+
+        if ($OutFile) {
+            $params["OutFile"] = $OutFile
+        }
+
+        $null = Invoke-WebRequest @params -ErrorAction Stop
+
+    } catch {
+        Write-Host "Failed to download:" -ForegroundColor Red
+        Write-Host "  $Uri"
+        throw
+    }
+}
+
+function Invoke-Unzip {
+    param($ZipPath, $DestPath)
+    Expand-Archive -Path $ZipPath -DestinationPath $DestPath -Force
+    Remove-Item $ZipPath -Force
+}
+
+chcp 65001
+Set-Location $PSScriptRoot
+
+Write-Info "Installing FFmpeg & CMake..."
+Invoke-Conda  ffmpeg cmake
+Write-Success "FFmpeg & CMake Installed"
+
+$PretrainedURL  = ""
+$G2PWURL        = ""
+$UVR5URL        = ""
+$NLTKURL        = ""
+$OpenJTalkURL   = ""
+
+switch ($Source) {
+    "HF" {
+        Write-Info "Download Model From HuggingFace"
+        $PretrainedURL = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/pretrained_models.zip"
+        $G2PWURL       = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/G2PWModel.zip"
+        $UVR5URL       = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/uvr5_weights.zip"
+        $NLTKURL       = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/nltk_data.zip"
+        $OpenJTalkURL  = "https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/open_jtalk_dic_utf_8-1.11.tar.gz"
+    }
+    "HF-Mirror" {
+        Write-Info "Download Model From HuggingFace-Mirror"
+        $PretrainedURL = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/pretrained_models.zip"
+        $G2PWURL       = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/G2PWModel.zip"
+        $UVR5URL       = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/uvr5_weights.zip"
+        $NLTKURL       = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/nltk_data.zip"
+        $OpenJTalkURL  = "https://hf-mirror.com/XXXXRT/GPT-SoVITS-Pretrained/resolve/main/open_jtalk_dic_utf_8-1.11.tar.gz"
+    }
+    "ModelScope" {
+        Write-Info "Download Model From ModelScope"
+        $PretrainedURL = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/pretrained_models.zip"
+        $G2PWURL       = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/G2PWModel.zip"
+        $UVR5URL       = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/uvr5_weights.zip"
+        $NLTKURL       = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/nltk_data.zip"
+        $OpenJTalkURL  = "https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/open_jtalk_dic_utf_8-1.11.tar.gz"
+    }
+}
+
+if (-not (Test-Path "GPT_SoVITS/pretrained_models/sv")) {
+    Write-Info "Downloading Pretrained Models..."
+    Invoke-Download -Uri $PretrainedURL -OutFile "pretrained_models.zip"
+    Invoke-Unzip "pretrained_models.zip" "GPT_SoVITS"
+    Write-Success "Pretrained Models Downloaded"
+} else {
+    Write-Info "Pretrained Model Exists"
+    Write-Info "Skip Downloading Pretrained Models"
+}
+
+
+if (-not (Test-Path "GPT_SoVITS/text/G2PWModel")) {
+    Write-Info "Downloading G2PWModel..."
+    Invoke-Download -Uri $G2PWURL -OutFile "G2PWModel.zip"
+    Invoke-Unzip "G2PWModel.zip" "GPT_SoVITS/text"
+    Write-Success "G2PWModel Downloaded"
+} else {
+    Write-Info "G2PWModel Exists"
+    Write-Info "Skip Downloading G2PWModel"
+}
+
+if ($DownloadUVR5) {
+    if (-not (Test-Path "tools/uvr5/uvr5_weights")) {
+        Write-Info "Downloading UVR5 Models..."
+        Invoke-Download -Uri $UVR5URL -OutFile "uvr5_weights.zip"
+        Invoke-Unzip "uvr5_weights.zip" "tools/uvr5"
+        Write-Success "UVR5 Models Downloaded"
+    } else {
+        Write-Info "UVR5 Models Exists"
+        Write-Info "Skip Downloading UVR5 Models"
+    }
+}
+
+switch ($Device) {
+    "CU128" {
+        Write-Info "Installing PyTorch For CUDA 12.8..."
+        Invoke-Pip torch torchaudio --index-url "https://download.pytorch.org/whl/cu128"
+    }
+    "CU126" {
+        Write-Info "Installing PyTorch For CUDA 12.6..."
+        Invoke-Pip torch torchaudio --index-url "https://download.pytorch.org/whl/cu126"
+    }
+    "CPU" {
+        Write-Info "Installing PyTorch For CPU..."
+        Invoke-Pip torch torchaudio --index-url "https://download.pytorch.org/whl/cpu"
+    }
+}
+Write-Success "PyTorch Installed"
+
+Write-Info "Installing Python Dependencies From requirements.txt..."
+Invoke-Pip -r extra-req.txt --no-deps
+Invoke-Pip -r requirements.txt
+Write-Success "Python Dependencies Installed"
+
+Write-Info "Downloading NLTK Data..."
+Invoke-Download -Uri $NLTKURL -OutFile "nltk_data.zip"
+Invoke-Unzip "nltk_data.zip" (python -c "import sys; print(sys.prefix)").Trim()
+
+Write-Info "Downloading Open JTalk Dict..."
+Invoke-Download -Uri $OpenJTalkURL -OutFile "open_jtalk_dic_utf_8-1.11.tar.gz"
+$target = (python -c "import os, pyopenjtalk; print(os.path.dirname(pyopenjtalk.__file__))").Trim()
+tar -xzf open_jtalk_dic_utf_8-1.11.tar.gz -C $target
+Remove-Item "open_jtalk_dic_utf_8-1.11.tar.gz" -Force
+Write-Success "Open JTalk Dic Downloaded"
+
+Write-Success "Installation Completed"
diff --git a/install.sh b/install.sh
index ea8d2e2d..a2fa751e 100644
--- a/install.sh
+++ b/install.sh
@@ -48,11 +48,12 @@ run_pip_quiet() {
 }
 
 run_wget_quiet() {
-    local output
-    output=$(wget --tries=25 --wait=5 --read-timeout=40 --retry-on-http-error=404 "$@" 2>&1) || {
-        echo -e "${ERROR} Wget failed:\n$output"
+    if wget --tries=25 --wait=5 --read-timeout=40 -q --show-progress "$@" 2>&1; then
+        tput cuu1 && tput el
+    else
+        echo -e "${ERROR} Wget failed"
         exit 1
-    }
+    fi
 }
 
 if ! command -v conda &>/dev/null; then
@@ -171,10 +172,13 @@ if ! $USE_HF && ! $USE_HF_MIRROR && ! $USE_MODELSCOPE; then
 fi
 
 case "$(uname -m)" in
-  x86_64|amd64)   SYSROOT_PKG="sysroot_linux-64>=2.28"       ;;
-  aarch64|arm64)  SYSROOT_PKG="sysroot_linux-aarch64>=2.28"  ;;
-  ppc64le)        SYSROOT_PKG="sysroot_linux-ppc64le>=2.28"  ;;
-  *) echo "Unsupported architecture: $(uname -m)"; exit 1 ;;
+x86_64 | amd64) SYSROOT_PKG="sysroot_linux-64>=2.28" ;;
+aarch64 | arm64) SYSROOT_PKG="sysroot_linux-aarch64>=2.28" ;;
+ppc64le) SYSROOT_PKG="sysroot_linux-ppc64le>=2.28" ;;
+*)
+    echo "Unsupported architecture: $(uname -m)"
+    exit 1
+    ;;
 esac
 
 # Install build tools
@@ -248,10 +252,7 @@ elif [ "$USE_MODELSCOPE" = "true" ]; then
     PYOPENJTALK_URL="https://www.modelscope.cn/models/XXXXRT/GPT-SoVITS-Pretrained/resolve/master/open_jtalk_dic_utf_8-1.11.tar.gz"
 fi
 
-if find -L "GPT_SoVITS/pretrained_models" -mindepth 1 ! -name '.gitignore' | grep -q .; then
-    echo -e "${INFO}Pretrained Model Exists"
-    echo -e "${INFO}Skip Downloading Pretrained Models"
-else
+if [ ! -d "GPT_SoVITS/pretrained_models/sv" ]; then
     echo -e "${INFO}Downloading Pretrained Models..."
     rm -rf pretrained_models.zip
     run_wget_quiet "$PRETRINED_URL"
@@ -259,6 +260,9 @@ else
     unzip -q -o pretrained_models.zip -d GPT_SoVITS
     rm -rf pretrained_models.zip
     echo -e "${SUCCESS}Pretrained Models Downloaded"
+else
+    echo -e "${INFO}Pretrained Model Exists"
+    echo -e "${INFO}Skip Downloading Pretrained Models"
 fi
 
 if [ ! -d "GPT_SoVITS/text/G2PWModel" ]; then

From 90ebefa78fd544da36eebe0b2003620879c921b0 Mon Sep 17 00:00:00 2001
From: KamioRinn <63162909+KamioRinn@users.noreply.github.com>
Date: Fri, 27 Jun 2025 10:41:52 +0800
Subject: [PATCH 06/27] make sure ort providers available (#2489)

---
 GPT_SoVITS/text/g2pw/onnx_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPT_SoVITS/text/g2pw/onnx_api.py b/GPT_SoVITS/text/g2pw/onnx_api.py
index 52eed443..1d5e4231 100644
--- a/GPT_SoVITS/text/g2pw/onnx_api.py
+++ b/GPT_SoVITS/text/g2pw/onnx_api.py
@@ -93,13 +93,13 @@ class G2PWOnnxConverter:
         sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
         sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
         sess_options.intra_op_num_threads = 2 if torch.cuda.is_available() else 0
-        try:
+        if "CUDAExecutionProvider" in onnxruntime.get_available_providers():
             self.session_g2pW = onnxruntime.InferenceSession(
                 os.path.join(uncompress_path, "g2pW.onnx"),
                 sess_options=sess_options,
                 providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
             )
-        except:
+        else:
             self.session_g2pW = onnxruntime.InferenceSession(
                 os.path.join(uncompress_path, "g2pW.onnx"),
                 sess_options=sess_options,

From 6df61f58e4d18d4c2ad9d1eddd6a1bd690034c23 Mon Sep 17 00:00:00 2001
From: KamioRinn <63162909+KamioRinn@users.noreply.github.com>
Date: Fri, 27 Jun 2025 11:58:41 +0800
Subject: [PATCH 07/27] =?UTF-8?q?=E8=AF=AD=E8=A8=80=E5=88=86=E5=89=B2?=
 =?UTF-8?q?=E5=8F=8A=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BC=98=E5=8C=96=20(#2488?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* better LangSegmenter

* add version num2str

* better version num2str

* sync fast infer

* sync api

* remove duplicate spaces

* remove unnecessary code

---------

Co-authored-by: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
---
 GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py | 126 +++++------
 GPT_SoVITS/inference_webui.py                 | 121 +++++-----
 .../text/LangSegmenter/langsegmenter.py       | 209 +++++++++++-------
 GPT_SoVITS/text/chinese.py                    |  14 --
 GPT_SoVITS/text/chinese2.py                   |  14 --
 GPT_SoVITS/text/zh_normalization/num.py       |  24 +-
 .../zh_normalization/text_normlization.py     |   3 +
 api.py                                        | 117 +++++-----
 8 files changed, 332 insertions(+), 296 deletions(-)

diff --git a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
index 9a478d43..283e91c3 100644
--- a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
+++ b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
@@ -121,71 +121,67 @@ class TextPreprocessor:
 
     def get_phones_and_bert(self, text: str, language: str, version: str, final: bool = False):
         with self.bert_lock:
-            if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
-                # language = language.replace("all_","")
-                formattext = text
-                while "  " in formattext:
-                    formattext = formattext.replace("  ", " ")
-                if language == "all_zh":
-                    if re.search(r"[A-Za-z]", formattext):
-                        formattext = re.sub(r"[a-z]", lambda x: x.group(0).upper(), formattext)
-                        formattext = chinese.mix_text_normalize(formattext)
-                        return self.get_phones_and_bert(formattext, "zh", version)
+            text = re.sub(r' {2,}', ' ', text)
+            textlist = []
+            langlist = []
+            if language == "all_zh":
+                for tmp in LangSegmenter.getTexts(text,"zh"):
+                    langlist.append(tmp["lang"])
+                    textlist.append(tmp["text"])
+            elif language == "all_yue":
+                for tmp in LangSegmenter.getTexts(text,"zh"):
+                    if tmp["lang"] == "zh":
+                        tmp["lang"] = "yue"
+                    langlist.append(tmp["lang"])
+                    textlist.append(tmp["text"])
+            elif language == "all_ja":
+                for tmp in LangSegmenter.getTexts(text,"ja"):
+                    langlist.append(tmp["lang"])
+                    textlist.append(tmp["text"])
+            elif language == "all_ko":
+                for tmp in LangSegmenter.getTexts(text,"ko"):
+                    langlist.append(tmp["lang"])
+                    textlist.append(tmp["text"])
+            elif language == "en":
+                langlist.append("en")
+                textlist.append(text)
+            elif language == "auto":
+                for tmp in LangSegmenter.getTexts(text):
+                    langlist.append(tmp["lang"])
+                    textlist.append(tmp["text"])
+            elif language == "auto_yue":
+                for tmp in LangSegmenter.getTexts(text):
+                    if tmp["lang"] == "zh":
+                        tmp["lang"] = "yue"
+                    langlist.append(tmp["lang"])
+                    textlist.append(tmp["text"])
+            else:
+                for tmp in LangSegmenter.getTexts(text):
+                    if langlist:
+                        if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
+                            textlist[-1] += tmp["text"]
+                            continue
+                    if tmp["lang"] == "en":
+                        langlist.append(tmp["lang"])
                     else:
-                        phones, word2ph, norm_text = self.clean_text_inf(formattext, language, version)
-                        bert = self.get_bert_feature(norm_text, word2ph).to(self.device)
-                elif language == "all_yue" and re.search(r"[A-Za-z]", formattext):
-                    formattext = re.sub(r"[a-z]", lambda x: x.group(0).upper(), formattext)
-                    formattext = chinese.mix_text_normalize(formattext)
-                    return self.get_phones_and_bert(formattext, "yue", version)
-                else:
-                    phones, word2ph, norm_text = self.clean_text_inf(formattext, language, version)
-                    bert = torch.zeros(
-                        (1024, len(phones)),
-                        dtype=torch.float32,
-                    ).to(self.device)
-            elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
-                textlist = []
-                langlist = []
-                if language == "auto":
-                    for tmp in LangSegmenter.getTexts(text):
-                        langlist.append(tmp["lang"])
-                        textlist.append(tmp["text"])
-                elif language == "auto_yue":
-                    for tmp in LangSegmenter.getTexts(text):
-                        if tmp["lang"] == "zh":
-                            tmp["lang"] = "yue"
-                        langlist.append(tmp["lang"])
-                        textlist.append(tmp["text"])
-                else:
-                    for tmp in LangSegmenter.getTexts(text):
-                        if langlist:
-                            if (tmp["lang"] == "en" and langlist[-1] == "en") or (
-                                tmp["lang"] != "en" and langlist[-1] != "en"
-                            ):
-                                textlist[-1] += tmp["text"]
-                                continue
-                        if tmp["lang"] == "en":
-                            langlist.append(tmp["lang"])
-                        else:
-                            # 因无法区别中日韩文汉字,以用户输入为准
-                            langlist.append(language)
-                        textlist.append(tmp["text"])
-                # print(textlist)
-                # print(langlist)
-                phones_list = []
-                bert_list = []
-                norm_text_list = []
-                for i in range(len(textlist)):
-                    lang = langlist[i]
-                    phones, word2ph, norm_text = self.clean_text_inf(textlist[i], lang, version)
-                    bert = self.get_bert_inf(phones, word2ph, norm_text, lang)
-                    phones_list.append(phones)
-                    norm_text_list.append(norm_text)
-                    bert_list.append(bert)
-                bert = torch.cat(bert_list, dim=1)
-                phones = sum(phones_list, [])
-                norm_text = "".join(norm_text_list)
+                        # 因无法区别中日韩文汉字,以用户输入为准
+                        langlist.append(language)
+                    textlist.append(tmp["text"])
+            # print(textlist)
+            # print(langlist)
+            phones_list = []
+            bert_list = []
+            norm_text_list = []
+            for i in range(len(textlist)):
+                lang = langlist[i]
+                phones, word2ph, norm_text = self.clean_text_inf(textlist[i], lang, version)
+                bert = self.get_bert_inf(phones, word2ph, norm_text, lang)
+                phones_list.append(phones)
+                norm_text_list.append(norm_text)
+                bert_list.append(bert)
+            bert = torch.cat(bert_list, dim=1)
+            phones = sum(phones_list, [])
+            norm_text = "".join(norm_text_list)
 
             if not final and len(phones) < 6:
                 return self.get_phones_and_bert("." + text, language, version, final=True)
@@ -240,4 +236,4 @@ class TextPreprocessor:
         punctuations = "".join(re.escape(p) for p in punctuation)
         pattern = f"([{punctuations}])([{punctuations}])+"
         result = re.sub(pattern, r"\1", text)
-        return result
+        return result
\ No newline at end of file
diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
index 5c7d0103..643e4054 100644
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -586,68 +586,67 @@ from text import chinese
 
 
 def get_phones_and_bert(text, language, version, final=False):
-    if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
-        formattext = text
-        while "  " in formattext:
-            formattext = formattext.replace("  ", " ")
-        if language == "all_zh":
-            if re.search(r"[A-Za-z]", formattext):
-                formattext = re.sub(r"[a-z]", lambda x: x.group(0).upper(), formattext)
-                formattext = chinese.mix_text_normalize(formattext)
-                return get_phones_and_bert(formattext, "zh", version)
+    text = re.sub(r' {2,}', ' ', text)
+    textlist = []
+    langlist = []
+    if language == "all_zh":
+        for tmp in LangSegmenter.getTexts(text,"zh"):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "all_yue":
+        for tmp in LangSegmenter.getTexts(text,"zh"):
+            if tmp["lang"] == "zh":
+                tmp["lang"] = "yue"
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "all_ja":
+        for tmp in LangSegmenter.getTexts(text,"ja"):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "all_ko":
+        for tmp in LangSegmenter.getTexts(text,"ko"):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "en":
+        langlist.append("en")
+        textlist.append(text)
+    elif language == "auto":
+        for tmp in LangSegmenter.getTexts(text):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "auto_yue":
+        for tmp in LangSegmenter.getTexts(text):
+            if tmp["lang"] == "zh":
+                tmp["lang"] = "yue"
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    else:
+        for tmp in LangSegmenter.getTexts(text):
+            if langlist:
+                if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
+                    textlist[-1] += tmp["text"]
+                    continue
+            if tmp["lang"] == "en":
+                langlist.append(tmp["lang"])
             else:
-                phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
-                bert = get_bert_feature(norm_text, word2ph).to(device)
-        elif language == "all_yue" and re.search(r"[A-Za-z]", formattext):
-            formattext = re.sub(r"[a-z]", lambda x: x.group(0).upper(), formattext)
-            formattext = chinese.mix_text_normalize(formattext)
-            return get_phones_and_bert(formattext, "yue", version)
-        else:
-            phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
-            bert = torch.zeros(
-                (1024, len(phones)),
-                dtype=torch.float16 if is_half == True else torch.float32,
-            ).to(device)
-    elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
-        textlist = []
-        langlist = []
-        if language == "auto":
-            for tmp in LangSegmenter.getTexts(text):
-                langlist.append(tmp["lang"])
-                textlist.append(tmp["text"])
-        elif language == "auto_yue":
-            for tmp in LangSegmenter.getTexts(text):
-                if tmp["lang"] == "zh":
-                    tmp["lang"] = "yue"
-                langlist.append(tmp["lang"])
-                textlist.append(tmp["text"])
-        else:
-            for tmp in LangSegmenter.getTexts(text):
-                if langlist:
-                    if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
-                        textlist[-1] += tmp["text"]
-                        continue
-                if tmp["lang"] == "en":
-                    langlist.append(tmp["lang"])
-                else:
-                    # 因无法区别中日韩文汉字,以用户输入为准
-                    langlist.append(language)
-                textlist.append(tmp["text"])
-        print(textlist)
-        print(langlist)
-        phones_list = []
-        bert_list = []
-        norm_text_list = []
-        for i in range(len(textlist)):
-            lang = langlist[i]
-            phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
-            bert = get_bert_inf(phones, word2ph, norm_text, lang)
-            phones_list.append(phones)
-            norm_text_list.append(norm_text)
-            bert_list.append(bert)
-        bert = torch.cat(bert_list, dim=1)
-        phones = sum(phones_list, [])
-        norm_text = "".join(norm_text_list)
+                # 因无法区别中日韩文汉字,以用户输入为准
+                langlist.append(language)
+            textlist.append(tmp["text"])
+    print(textlist)
+    print(langlist)
+    phones_list = []
+    bert_list = []
+    norm_text_list = []
+    for i in range(len(textlist)):
+        lang = langlist[i]
+        phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
+        bert = get_bert_inf(phones, word2ph, norm_text, lang)
+        phones_list.append(phones)
+        norm_text_list.append(norm_text)
+        bert_list.append(bert)
+    bert = torch.cat(bert_list, dim=1)
+    phones = sum(phones_list, [])
+    norm_text = "".join(norm_text_list)
 
     if not final and len(phones) < 6:
         return get_phones_and_bert("." + text, language, version, final=True)
diff --git a/GPT_SoVITS/text/LangSegmenter/langsegmenter.py b/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
index 0187ea69..99b3a422 100644
--- a/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
+++ b/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
@@ -3,44 +3,38 @@ import re
 
 # jieba静音
 import jieba
-
 jieba.setLogLevel(logging.CRITICAL)
 
 # 更改fast_langdetect大模型位置
 from pathlib import Path
 import fast_langdetect
-
-fast_langdetect.infer._default_detector = fast_langdetect.infer.LangDetector(
-    fast_langdetect.infer.LangDetectConfig(
-        cache_dir=Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect"
-    )
-)
+fast_langdetect.infer._default_detector = fast_langdetect.infer.LangDetector(fast_langdetect.infer.LangDetectConfig(cache_dir=Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect"))
 
 
 from split_lang import LangSplitter
 
 
 def full_en(text):
-    pattern = r"^(?=.*[A-Za-z])[A-Za-z0-9\s\u0020-\u007E\u2000-\u206F\u3000-\u303F\uFF00-\uFFEF]+$"
+    pattern = r'^(?=.*[A-Za-z])[A-Za-z0-9\s\u0020-\u007E\u2000-\u206F\u3000-\u303F\uFF00-\uFFEF]+$'
     return bool(re.match(pattern, text))
 
 
 def full_cjk(text):
     # 来自wiki
     cjk_ranges = [
-        (0x4E00, 0x9FFF),  # CJK Unified Ideographs
-        (0x3400, 0x4DB5),  # CJK Extension A
-        (0x20000, 0x2A6DD),  # CJK Extension B
-        (0x2A700, 0x2B73F),  # CJK Extension C
-        (0x2B740, 0x2B81F),  # CJK Extension D
-        (0x2B820, 0x2CEAF),  # CJK Extension E
-        (0x2CEB0, 0x2EBEF),  # CJK Extension F
-        (0x30000, 0x3134A),  # CJK Extension G
-        (0x31350, 0x323AF),  # CJK Extension H
-        (0x2EBF0, 0x2EE5D),  # CJK Extension H
+        (0x4E00, 0x9FFF),        # CJK Unified Ideographs
+        (0x3400, 0x4DB5),        # CJK Extension A
+        (0x20000, 0x2A6DD),      # CJK Extension B
+        (0x2A700, 0x2B73F),      # CJK Extension C
+        (0x2B740, 0x2B81F),      # CJK Extension D
+        (0x2B820, 0x2CEAF),      # CJK Extension E
+        (0x2CEB0, 0x2EBEF),      # CJK Extension F
+        (0x30000, 0x3134A),      # CJK Extension G
+        (0x31350, 0x323AF),      # CJK Extension H
+        (0x2EBF0, 0x2EE5D),      # CJK Extension H
     ]
 
-    pattern = r"[0-9、-〜。！？.!?… /]+$"
+    pattern = r'[0-9、-〜。！？.!?… /]+$'
 
     cjk_text = ""
     for char in text:
@@ -51,7 +45,7 @@ def full_cjk(text):
     return cjk_text
 
 
-def split_jako(tag_lang, item):
+def split_jako(tag_lang,item):
     if tag_lang == "ja":
         pattern = r"([\u3041-\u3096\u3099\u309A\u30A1-\u30FA\u30FC]+(?:[0-9、-〜。！？.!?… ]+[\u3041-\u3096\u3099\u309A\u30A1-\u30FA\u30FC]*)*)"
     else:
@@ -59,118 +53,165 @@ def split_jako(tag_lang, item):
 
     lang_list: list[dict] = []
     tag = 0
-    for match in re.finditer(pattern, item["text"]):
+    for match in re.finditer(pattern, item['text']):
         if match.start() > tag:
-            lang_list.append({"lang": item["lang"], "text": item["text"][tag : match.start()]})
+            lang_list.append({'lang':item['lang'],'text':item['text'][tag:match.start()]})
 
         tag = match.end()
-        lang_list.append({"lang": tag_lang, "text": item["text"][match.start() : match.end()]})
+        lang_list.append({'lang':tag_lang,'text':item['text'][match.start():match.end()]})
 
-    if tag < len(item["text"]):
-        lang_list.append({"lang": item["lang"], "text": item["text"][tag : len(item["text"])]})
+    if tag < len(item['text']):
+        lang_list.append({'lang':item['lang'],'text':item['text'][tag:len(item['text'])]})
 
     return lang_list
 
 
 def merge_lang(lang_list, item):
-    if lang_list and item["lang"] == lang_list[-1]["lang"]:
-        lang_list[-1]["text"] += item["text"]
+    if lang_list and item['lang'] == lang_list[-1]['lang']:
+        lang_list[-1]['text'] += item['text']
     else:
         lang_list.append(item)
     return lang_list
 
 
-class LangSegmenter:
+class LangSegmenter():
     # 默认过滤器, 基于gsv目前四种语言
     DEFAULT_LANG_MAP = {
         "zh": "zh",
         "yue": "zh",  # 粤语
         "wuu": "zh",  # 吴语
         "zh-cn": "zh",
-        "zh-tw": "x",  # 繁体设置为x
+        "zh-tw": "x", # 繁体设置为x
         "ko": "ko",
         "ja": "ja",
         "en": "en",
     }
 
-    def getTexts(text):
+    def getTexts(text,default_lang = ""):
         lang_splitter = LangSplitter(lang_map=LangSegmenter.DEFAULT_LANG_MAP)
+        lang_splitter.merge_across_digit = False
         substr = lang_splitter.split_by_lang(text=text)
 
         lang_list: list[dict] = []
 
-        for _, item in enumerate(substr):
-            dict_item = {"lang": item.lang, "text": item.text}
+        have_num = False
 
-            # 处理短英文被识别为其他语言的问题
-            if full_en(dict_item["text"]):
-                dict_item["lang"] = "en"
-                lang_list = merge_lang(lang_list, dict_item)
+        for _, item in enumerate(substr):
+            dict_item = {'lang':item.lang,'text':item.text}
+
+            if dict_item['lang'] == 'digit':
+                if default_lang != "":
+                    dict_item['lang'] = default_lang
+                else:
+                    have_num = True
+                lang_list = merge_lang(lang_list,dict_item)
                 continue
 
-            # 处理非日语夹日文的问题(不包含CJK)
-            ja_list: list[dict] = []
-            if dict_item["lang"] != "ja":
-                ja_list = split_jako("ja", dict_item)
+            # 处理短英文被识别为其他语言的问题
+            if full_en(dict_item['text']):  
+                dict_item['lang'] = 'en'
+                lang_list = merge_lang(lang_list,dict_item)
+                continue
 
-            if not ja_list:
-                ja_list.append(dict_item)
+            if default_lang != "":
+                dict_item['lang'] = default_lang
+                lang_list = merge_lang(lang_list,dict_item)
+                continue
+            else:
+                # 处理非日语夹日文的问题(不包含CJK)
+                ja_list: list[dict] = []
+                if dict_item['lang'] != 'ja':
+                    ja_list = split_jako('ja',dict_item)
 
-            # 处理非韩语夹韩语的问题(不包含CJK)
-            ko_list: list[dict] = []
-            temp_list: list[dict] = []
-            for _, ko_item in enumerate(ja_list):
-                if ko_item["lang"] != "ko":
-                    ko_list = split_jako("ko", ko_item)
+                if not ja_list:
+                    ja_list.append(dict_item)
 
-                if ko_list:
-                    temp_list.extend(ko_list)
-                else:
-                    temp_list.append(ko_item)
+                # 处理非韩语夹韩语的问题(不包含CJK)
+                ko_list: list[dict] = []
+                temp_list: list[dict] = []
+                for _, ko_item in enumerate(ja_list):
+                    if ko_item["lang"] != 'ko':
+                        ko_list = split_jako('ko',ko_item)
 
-            # 未存在非日韩文夹日韩文
-            if len(temp_list) == 1:
-                # 未知语言检查是否为CJK
-                if dict_item["lang"] == "x":
-                    cjk_text = full_cjk(dict_item["text"])
-                    if cjk_text:
-                        dict_item = {"lang": "zh", "text": cjk_text}
-                        lang_list = merge_lang(lang_list, dict_item)
+                    if ko_list:
+                        temp_list.extend(ko_list)
                     else:
-                        lang_list = merge_lang(lang_list, dict_item)
-                    continue
-                else:
-                    lang_list = merge_lang(lang_list, dict_item)
-                    continue
+                        temp_list.append(ko_item)
 
-            # 存在非日韩文夹日韩文
-            for _, temp_item in enumerate(temp_list):
-                # 未知语言检查是否为CJK
-                if temp_item["lang"] == "x":
-                    cjk_text = full_cjk(dict_item["text"])
-                    if cjk_text:
-                        dict_item = {"lang": "zh", "text": cjk_text}
-                        lang_list = merge_lang(lang_list, dict_item)
+                # 未存在非日韩文夹日韩文
+                if len(temp_list) == 1:
+                    # 未知语言检查是否为CJK
+                    if dict_item['lang'] == 'x':
+                        cjk_text = full_cjk(dict_item['text'])
+                        if cjk_text:
+                            dict_item = {'lang':'zh','text':cjk_text}
+                            lang_list = merge_lang(lang_list,dict_item)
+                        else:
+                            lang_list = merge_lang(lang_list,dict_item)
+                        continue
                     else:
-                        lang_list = merge_lang(lang_list, dict_item)
-                else:
-                    lang_list = merge_lang(lang_list, temp_item)
+                        lang_list = merge_lang(lang_list,dict_item)
+                        continue
 
+                # 存在非日韩文夹日韩文
+                for _, temp_item in enumerate(temp_list):
+                    # 未知语言检查是否为CJK
+                    if temp_item['lang'] == 'x':
+                        cjk_text = full_cjk(temp_item['text'])
+                        if cjk_text:
+                            lang_list = merge_lang(lang_list,{'lang':'zh','text':cjk_text})
+                        else:
+                            lang_list = merge_lang(lang_list,temp_item)
+                    else:
+                        lang_list = merge_lang(lang_list,temp_item)
+
+        # 有数字
+        if have_num:
+            temp_list = lang_list
+            lang_list = []
+            for i, temp_item in enumerate(temp_list):
+                if temp_item['lang'] == 'digit':
+                    if default_lang:
+                        temp_item['lang'] = default_lang
+                    elif lang_list and i == len(temp_list) - 1:
+                        temp_item['lang'] = lang_list[-1]['lang']
+                    elif not lang_list and i < len(temp_list) - 1:
+                        temp_item['lang'] = temp_list[1]['lang']
+                    elif lang_list and i < len(temp_list) - 1:
+                        if lang_list[-1]['lang'] == temp_list[i + 1]['lang']:
+                            temp_item['lang'] = lang_list[-1]['lang']
+                        elif lang_list[-1]['text'][-1] in [",",".","!","?","，","。","！","？"]:
+                            temp_item['lang'] = temp_list[i + 1]['lang']
+                        elif temp_list[i + 1]['text'][0] in [",",".","!","?","，","。","！","？"]:
+                            temp_item['lang'] = lang_list[-1]['lang']
+                        elif temp_item['text'][-1] in ["。","."]:
+                            temp_item['lang'] = lang_list[-1]['lang']
+                        elif len(lang_list[-1]['text']) >= len(temp_list[i + 1]['text']):
+                            temp_item['lang'] = lang_list[-1]['lang']
+                        else:
+                            temp_item['lang'] = temp_list[i + 1]['lang']
+                    else:
+                        temp_item['lang'] = 'zh'
+
+                lang_list = merge_lang(lang_list,temp_item)
+
+
+        # 筛X
         temp_list = lang_list
         lang_list = []
         for _, temp_item in enumerate(temp_list):
-            if temp_item["lang"] == "x":
+            if temp_item['lang'] == 'x':
                 if lang_list:
-                    temp_item["lang"] = lang_list[-1]["lang"]
+                    temp_item['lang'] = lang_list[-1]['lang']
                 elif len(temp_list) > 1:
-                    temp_item["lang"] = temp_list[1]["lang"]
+                    temp_item['lang'] = temp_list[1]['lang']
                 else:
-                    temp_item["lang"] = "zh"
+                    temp_item['lang'] = 'zh'
 
-            lang_list = merge_lang(lang_list, temp_item)
+            lang_list = merge_lang(lang_list,temp_item)
 
         return lang_list
-
+    
 
 if __name__ == "__main__":
     text = "MyGO?,你也喜欢まいご吗？"
@@ -178,3 +219,7 @@ if __name__ == "__main__":
 
     text = "ねえ、知ってる？最近、僕は天文学を勉強してるんだ。君の瞳が星空みたいにキラキラしてるからさ。"
     print(LangSegmenter.getTexts(text))
+
+    text = "当时ThinkPad T60刚刚发布，一同推出的还有一款名为Advanced Dock的扩展坞配件。这款扩展坞通过连接T60底部的插槽，扩展出包括PCIe在内的一大堆接口，并且自带电源，让T60可以安装桌面显卡来提升性能。"
+    print(LangSegmenter.getTexts(text,"zh"))
+    print(LangSegmenter.getTexts(text))
\ No newline at end of file
diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py
index ce44215f..944c9cb7 100644
--- a/GPT_SoVITS/text/chinese.py
+++ b/GPT_SoVITS/text/chinese.py
@@ -181,20 +181,6 @@ def text_normalize(text):
     return dest_text
 
 
-# 不排除英文的文本格式化
-def mix_text_normalize(text):
-    # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
-    tx = TextNormalizer()
-    sentences = tx.normalize(text)
-    dest_text = ""
-    for sentence in sentences:
-        dest_text += replace_punctuation_with_en(sentence)
-
-    # 避免重复标点引起的参考泄露
-    dest_text = replace_consecutive_punctuation(dest_text)
-    return dest_text
-
-
 if __name__ == "__main__":
     text = "啊——但是《原神》是由,米哈\游自主，研发的一款全.新开放世界.冒险游戏"
     text = "呣呣呣～就是…大人的鼹鼠党吧？"
diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py
index 612aa3a5..dcce0d96 100644
--- a/GPT_SoVITS/text/chinese2.py
+++ b/GPT_SoVITS/text/chinese2.py
@@ -326,20 +326,6 @@ def text_normalize(text):
     return dest_text
 
 
-# 不排除英文的文本格式化
-def mix_text_normalize(text):
-    # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
-    tx = TextNormalizer()
-    sentences = tx.normalize(text)
-    dest_text = ""
-    for sentence in sentences:
-        dest_text += replace_punctuation_with_en(sentence)
-
-    # 避免重复标点引起的参考泄露
-    dest_text = replace_consecutive_punctuation(dest_text)
-    return dest_text
-
-
 if __name__ == "__main__":
     text = "啊——但是《原神》是由,米哈\游自主，研发的一款全.新开放世界.冒险游戏"
     text = "呣呣呣～就是…大人的鼹鼠党吧？"
diff --git a/GPT_SoVITS/text/zh_normalization/num.py b/GPT_SoVITS/text/zh_normalization/num.py
index c3af4d6a..14d602b0 100644
--- a/GPT_SoVITS/text/zh_normalization/num.py
+++ b/GPT_SoVITS/text/zh_normalization/num.py
@@ -256,6 +256,24 @@ def replace_to_range(match) -> str:
     return result
 
 
+RE_VERSION_NUM = re.compile(r"((\d+)(\.\d+)(\.\d+)?(\.\d+)+)")
+def replace_vrsion_num(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    result = ""
+    for c in match.group(1):
+        if c == ".":
+            result += "点"
+        else:
+            result += num2str(c)
+    return result
+
+
+
 def _get_value(value_string: str, use_zero: bool = True) -> List[str]:
     stripped = value_string.lstrip("0")
     if len(stripped) == 0:
@@ -308,7 +326,11 @@ def num2str(value_string: str) -> str:
 
     result = verbalize_cardinal(integer)
 
-    decimal = decimal.rstrip("0")
+    if decimal.endswith("0"):
+        decimal = decimal.rstrip("0") + "0"
+    else:
+        decimal = decimal.rstrip("0")
+
     if decimal:
         # '.22' is verbalized as '零点二二'
         # '3.20' is verbalized as '三点二
diff --git a/GPT_SoVITS/text/zh_normalization/text_normlization.py b/GPT_SoVITS/text/zh_normalization/text_normlization.py
index 099b01bd..1a6cdd0f 100644
--- a/GPT_SoVITS/text/zh_normalization/text_normlization.py
+++ b/GPT_SoVITS/text/zh_normalization/text_normlization.py
@@ -25,6 +25,7 @@ from .chronology import replace_time
 from .constants import F2H_ASCII_LETTERS
 from .constants import F2H_DIGITS
 from .constants import F2H_SPACE
+from .num import RE_VERSION_NUM
 from .num import RE_DECIMAL_NUM
 from .num import RE_DEFAULT_NUM
 from .num import RE_FRAC
@@ -36,6 +37,7 @@ from .num import RE_RANGE
 from .num import RE_TO_RANGE
 from .num import RE_ASMD
 from .num import RE_POWER
+from .num import replace_vrsion_num
 from .num import replace_default_num
 from .num import replace_frac
 from .num import replace_negative_num
@@ -158,6 +160,7 @@ class TextNormalizer:
         sentence = RE_RANGE.sub(replace_range, sentence)
 
         sentence = RE_INTEGER.sub(replace_negative_num, sentence)
+        sentence = RE_VERSION_NUM.sub(replace_vrsion_num, sentence)
         sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
         sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier, sentence)
         sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
diff --git a/api.py b/api.py
index dc2e8826..cc0896a2 100644
--- a/api.py
+++ b/api.py
@@ -543,66 +543,65 @@ from text import chinese
 
 
 def get_phones_and_bert(text, language, version, final=False):
-    if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
-        formattext = text
-        while "  " in formattext:
-            formattext = formattext.replace("  ", " ")
-        if language == "all_zh":
-            if re.search(r"[A-Za-z]", formattext):
-                formattext = re.sub(r"[a-z]", lambda x: x.group(0).upper(), formattext)
-                formattext = chinese.mix_text_normalize(formattext)
-                return get_phones_and_bert(formattext, "zh", version)
+    text = re.sub(r' {2,}', ' ', text)
+    textlist = []
+    langlist = []
+    if language == "all_zh":
+        for tmp in LangSegmenter.getTexts(text,"zh"):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "all_yue":
+        for tmp in LangSegmenter.getTexts(text,"zh"):
+            if tmp["lang"] == "zh":
+                tmp["lang"] = "yue"
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "all_ja":
+        for tmp in LangSegmenter.getTexts(text,"ja"):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "all_ko":
+        for tmp in LangSegmenter.getTexts(text,"ko"):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "en":
+        langlist.append("en")
+        textlist.append(text)
+    elif language == "auto":
+        for tmp in LangSegmenter.getTexts(text):
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    elif language == "auto_yue":
+        for tmp in LangSegmenter.getTexts(text):
+            if tmp["lang"] == "zh":
+                tmp["lang"] = "yue"
+            langlist.append(tmp["lang"])
+            textlist.append(tmp["text"])
+    else:
+        for tmp in LangSegmenter.getTexts(text):
+            if langlist:
+                if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
+                    textlist[-1] += tmp["text"]
+                    continue
+            if tmp["lang"] == "en":
+                langlist.append(tmp["lang"])
             else:
-                phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
-                bert = get_bert_feature(norm_text, word2ph).to(device)
-        elif language == "all_yue" and re.search(r"[A-Za-z]", formattext):
-            formattext = re.sub(r"[a-z]", lambda x: x.group(0).upper(), formattext)
-            formattext = chinese.mix_text_normalize(formattext)
-            return get_phones_and_bert(formattext, "yue", version)
-        else:
-            phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
-            bert = torch.zeros(
-                (1024, len(phones)),
-                dtype=torch.float16 if is_half == True else torch.float32,
-            ).to(device)
-    elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
-        textlist = []
-        langlist = []
-        if language == "auto":
-            for tmp in LangSegmenter.getTexts(text):
-                langlist.append(tmp["lang"])
-                textlist.append(tmp["text"])
-        elif language == "auto_yue":
-            for tmp in LangSegmenter.getTexts(text):
-                if tmp["lang"] == "zh":
-                    tmp["lang"] = "yue"
-                langlist.append(tmp["lang"])
-                textlist.append(tmp["text"])
-        else:
-            for tmp in LangSegmenter.getTexts(text):
-                if langlist:
-                    if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
-                        textlist[-1] += tmp["text"]
-                        continue
-                if tmp["lang"] == "en":
-                    langlist.append(tmp["lang"])
-                else:
-                    # 因无法区别中日韩文汉字,以用户输入为准
-                    langlist.append(language)
-                textlist.append(tmp["text"])
-        phones_list = []
-        bert_list = []
-        norm_text_list = []
-        for i in range(len(textlist)):
-            lang = langlist[i]
-            phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
-            bert = get_bert_inf(phones, word2ph, norm_text, lang)
-            phones_list.append(phones)
-            norm_text_list.append(norm_text)
-            bert_list.append(bert)
-        bert = torch.cat(bert_list, dim=1)
-        phones = sum(phones_list, [])
-        norm_text = "".join(norm_text_list)
+                # 因无法区别中日韩文汉字,以用户输入为准
+                langlist.append(language)
+            textlist.append(tmp["text"])
+    phones_list = []
+    bert_list = []
+    norm_text_list = []
+    for i in range(len(textlist)):
+        lang = langlist[i]
+        phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
+        bert = get_bert_inf(phones, word2ph, norm_text, lang)
+        phones_list.append(phones)
+        norm_text_list.append(norm_text)
+        bert_list.append(bert)
+    bert = torch.cat(bert_list, dim=1)
+    phones = sum(phones_list, [])
+    norm_text = "".join(norm_text_list)
 
     if not final and len(phones) < 6:
         return get_phones_and_bert("." + text, language, version, final=True)

From 8c579d46dda62ae5bdf04f96796c1acc79d44b59 Mon Sep 17 00:00:00 2001
From: Yixiao Chen <113320753+Bobchenyx@users.noreply.github.com>
Date: Wed, 2 Jul 2025 10:48:28 -0400
Subject: [PATCH 08/27] Update export_torch_script.py (#2494)

Avoid dtype inconsistency when exporting
---
 GPT_SoVITS/export_torch_script.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/GPT_SoVITS/export_torch_script.py b/GPT_SoVITS/export_torch_script.py
index a3a48277..e4406f28 100644
--- a/GPT_SoVITS/export_torch_script.py
+++ b/GPT_SoVITS/export_torch_script.py
@@ -474,6 +474,10 @@ class T2SModel(nn.Module):
         bert = bert.unsqueeze(0)
 
         x = self.ar_text_embedding(all_phoneme_ids)
+
+        # avoid dtype inconsistency when exporting
+        bert = bert.to(dtype=self.bert_proj.weight.dtype)
+        
         x = x + self.bert_proj(bert.transpose(1, 2))
         x: torch.Tensor = self.ar_text_position(x)
 

From 0899b7e432561c6db3b33b2556fbb0bde25f2725 Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 10 Jul 2025 17:59:49 +0800
Subject: [PATCH 09/27] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e67288ea..6bcb2844 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
 
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
 [![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Huggingface](https://img.shields.io/badge/HF-online_demo-blue.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)

From 3a75f5023feea209b256c2943cfb42418313ba28 Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 10 Jul 2025 18:05:03 +0800
Subject: [PATCH 10/27] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6bcb2844..6cb19092 100644
--- a/README.md
+++ b/README.md
@@ -10,13 +10,13 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
 <!-- img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br> -->
 
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/HF-online_demo-blue.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
+[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-blue.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
 [![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
 [![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/en/Changelog_EN.md)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
 
 **English** | [**中文简体**](./docs/cn/README.md) | [**日本語**](./docs/ja/README.md) | [**한국어**](./docs/ko/README.md) | [**Türkçe**](./docs/tr/README.md)
 

From 97e37c74d8dc8704bc8192caf61cea67156af1dc Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 10 Jul 2025 18:06:04 +0800
Subject: [PATCH 11/27] Update README.md

---
 docs/cn/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/cn/README.md b/docs/cn/README.md
index 77841b99..39e37a42 100644
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@@ -8,13 +8,13 @@
 <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-blue.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
 [![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
 [![Change Log](https://img.shields.io/badge/更新日志-查看更新-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/cn/Changelog_CN.md)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
 
 [**English**](../../README.md) | **中文简体** | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
 

From e63e0901fd62a8ce471507828fa281e895f32b7a Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 10 Jul 2025 18:12:24 +0800
Subject: [PATCH 12/27] Update assets.py

---
 tools/assets.py | 89 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 67 insertions(+), 22 deletions(-)

diff --git a/tools/assets.py b/tools/assets.py
index 28d0f6b4..533a020c 100644
--- a/tools/assets.py
+++ b/tools/assets.py
@@ -1,38 +1,81 @@
 js = """
-function deleteTheme() {
+function createGradioAnimation() {
     
-const params = new URLSearchParams(window.location.search);
-if (params.has('__theme')) {
-    params.delete('__theme');
-    const newUrl = `${window.location.pathname}?${params.toString()}`;
-    window.location.replace(newUrl);
-}
+    const params = new URLSearchParams(window.location.search);
+    if (params.get('__theme') !== 'light') { 
+        params.set('__theme', 'light'); // 仅当 __theme 不是 'light' 时设置为 'light'
+        window.location.search = params.toString(); // 更新 URL，触发页面刷新
+    }
+    
+    var container = document.createElement('div');
+    container.id = 'gradio-animation';
+    container.style.fontSize = '2em';
+    container.style.fontWeight = '500';
+    container.style.textAlign = 'center';
+    container.style.marginBottom = '20px';
+    container.style.fontFamily = '-apple-system, sans-serif, Arial, Calibri';
 
+    var text = 'Welcome to GPT-SoVITS !';
+    for (var i = 0; i < text.length; i++) {
+        (function(i){
+            setTimeout(function(){
+                var letter = document.createElement('span');
+                letter.style.opacity = '0';
+                letter.style.transition = 'opacity 0.5s';
+                letter.innerText = text[i];
+
+                container.appendChild(letter);
+
+                setTimeout(function() {
+                    letter.style.opacity = '1';
+                }, 50);
+            }, i * 250);
+        })(i);
+    }
+    return 'Animation created';
 }
 """
 
+
 css = """
 /* CSSStyleRule */
+
 .markdown {
+    background-color: lightblue;
     padding: 6px 10px;
 }
 
-@media (prefers-color-scheme: light) {
-    .markdown {
-        background-color: lightblue;
-        color: #000;
-    }
-}
-
-@media (prefers-color-scheme: dark) {
-    .markdown {
-        background-color: #4b4b4b;
-        color: rgb(244, 244, 245);
-    }
+.checkbox_info {
+    color: var(--block-title-text-color) !important;
+    font-size: var(--block-title-text-size) !important;
+    font-weight: var(--block-title-text-weight) !important;
+    height: 22px;
+    margin-bottom: 8px !important;
 }
 
 ::selection {
-    background: #ffc078 !important;
+    background: #ffc078; !important;
+}
+
+#checkbox_train_dpo input[type="checkbox"]{
+    margin-top: 6px;
+}
+
+#checkbox_train_dpo span {
+    margin-top: 6px;
+}
+
+#checkbox_align_train {
+    padding-top: 18px;
+    padding-bottom: 18px;
+}
+
+#checkbox_align_infer input[type="checkbox"] {
+    margin-top: 10px;
+}
+
+#checkbox_align_infer span {
+    margin-top: 10px;
 }
 
 footer {
@@ -48,14 +91,16 @@ footer * {
 }
 
 """
-
 top_html = """
 <div align="center">
     <div style="margin-bottom: 5px; font-size: 15px;">{}</div>
-    <div style="display: flex; gap: 80px; justify-content: center;">
+    <div style="display: flex; gap: 60px; justify-content: center;">
         <a href="https://github.com/RVC-Boss/GPT-SoVITS" target="_blank">
             <img src="https://img.shields.io/badge/GitHub-GPT--SoVITS-blue.svg?style=for-the-badge&logo=github" style="width: auto; height: 30px;">
         </a>
+        <a href="https://github.com/RVC-Boss/GPT-SoVITS" target="_blank">
+            <img src="https://img.shields.io/badge/免费在线体验-free_online_demo-blue.svg?style=for-the-badge&logo=huggingface" style="width: auto; height: 30px;">
+        </a>
         <a href="https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e" target="_blank">
             <img src="https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white" style="width: auto; height: 30px;">
         </a>

From 4e3c69043cf0772f02125a63e70af835ca702904 Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 10 Jul 2025 18:16:24 +0800
Subject: [PATCH 13/27] Update inference_webui.py

---
 GPT_SoVITS/inference_webui.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
index 643e4054..a361ed58 100644
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -6,7 +6,20 @@
 全部按英文识别
 全部按日文识别
 """
+import psutil
+import os
 
+def set_high_priority():
+    """把当前 Python 进程设为 HIGH_PRIORITY_CLASS"""
+    if os.name != "nt":
+        return # 仅 Windows 有效
+    p = psutil.Process(os.getpid())
+    try:
+        p.nice(psutil.HIGH_PRIORITY_CLASS)
+        print("已将进程优先级设为 High")
+    except psutil.AccessDenied:
+        print("权限不足，无法修改优先级（请用管理员运行）")
+set_high_priority()
 import json
 import logging
 import os

From 426e1a2bb43614af2479b877c37acfb0591e952f Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 10 Jul 2025 18:16:45 +0800
Subject: [PATCH 14/27] =?UTF-8?q?=E6=8F=90=E5=8D=87=E6=8E=A8=E7=90=86?=
 =?UTF-8?q?=E8=BF=9B=E7=A8=8B=E4=BC=98=E5=85=88=E7=BA=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 GPT_SoVITS/inference_webui_fast.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py
index 470b7bbd..2c159d81 100644
--- a/GPT_SoVITS/inference_webui_fast.py
+++ b/GPT_SoVITS/inference_webui_fast.py
@@ -6,7 +6,20 @@
 全部按英文识别
 全部按日文识别
 """
+import psutil
+import os
 
+def set_high_priority():
+    """把当前 Python 进程设为 HIGH_PRIORITY_CLASS"""
+    if os.name != "nt":
+        return # 仅 Windows 有效
+    p = psutil.Process(os.getpid())
+    try:
+        p.nice(psutil.HIGH_PRIORITY_CLASS)
+        print("已将进程优先级设为 High")
+    except psutil.AccessDenied:
+        print("权限不足，无法修改优先级（请用管理员运行）")
+set_high_priority()
 import json
 import logging
 import os

From fec515dcce2053164e6793a1c2ffe8fecc32d6ad Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 10 Jul 2025 18:33:18 +0800
Subject: [PATCH 15/27] Update Changelog_CN.md

---
 docs/cn/Changelog_CN.md | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md
index 6c6dcdbe..f68e23a9 100644
--- a/docs/cn/Changelog_CN.md
+++ b/docs/cn/Changelog_CN.md
@@ -594,3 +594,33 @@
   - 内容: 修复实验名结尾出现空格在win中路径不正确的问题
   - 类型: 修复
   - 提交: RVC-Boss
+- 2025.06.10 [Commit#746cb536](https://github.com/RVC-Boss/GPT-SoVITS/commit/746cb536c68b1fe6ce3ca7e882235375b8a8dd89)
+  - 内容: 语种分割优化
+  - 类型: 优化
+  - 提交: KamioRinn
+- 2025.06.11 [Commit#dd2b9253](https://github.com/RVC-Boss/GPT-SoVITS/commit/dd2b9253aabb09db32db7a3344570ed9df043351)
+  - 内容: 修复并行推理对v2pro支持bug
+  - 类型: 修复
+  - 提交: YYuX-1145
+- 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
+  - 内容: v2pro对ge提取时会出现数值溢出的问题修复
+  - 类型: 修复
+  - 提交: RVC-Boss
+- 2025.06.11 [Commit#37f5abfc](https://github.com/RVC-Boss/GPT-SoVITS/commit/6fdc67ca83418306f11e90b9139278313ac5c3e9)[Commit#6fdc67ca](https://github.com/RVC-Boss/GPT-SoVITS/commit/37f5abfcb4a6553652235909db2e124b6f8ff3a5)
+  - 内容: install.sh逻辑优化
+  - 类型: 优化
+  - 提交: XXXXRT666
+- 2025.06.27 [Commit#90ebefa7](https://github.com/RVC-Boss/GPT-SoVITS/commit/90ebefa78fd544da36eebe0b2003620879c921b0)
+  - 内容: onnxruntime加载逻辑优化（对gpu/cpu的判断）
+  - 类型: 优化
+  - 提交: KamioRinn
+- 2025.06.27 [Commit#6df61f58](https://github.com/RVC-Boss/GPT-SoVITS/commit/6df61f58e4d18d4c2ad9d1eddd6a1bd690034c23)
+  - 内容: 语言分割及格式化优化
+  - 类型: 优化
+  - 提交: KamioRinn
+- 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
+  - 内容: 提升推理进程优先级（修复win11下可能GPU利用率受限的问题）
+  - 类型: 修复
+  - 提交: XianYue0125
+
+

From ec1218893e09389b23e477bf685ced7e5ec2fb08 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:10:07 +0800
Subject: [PATCH 16/27] Update Badge (#2518)

* Update README.md

* Update README.md

* Update Badges

* specify ranges
---
 README.md         |  5 ++++-
 docs/cn/README.md |  8 ++++++--
 docs/ja/README.md | 10 +++++++---
 docs/ko/README.md | 10 +++++++---
 docs/tr/README.md |  9 ++++++---
 5 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 6cb19092..be807a08 100644
--- a/README.md
+++ b/README.md
@@ -9,8 +9,11 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
 
 <!-- img src="https://counter.seku.su/cmoe?name=gptsovits&theme=r34" /><br> -->
 
+[![Python](https://img.shields.io/badge/python-3.10--3.12-blue?style=for-the-badge&logo=python)](https://www.python.org)
+[![GitHub release](https://img.shields.io/github/v/release/RVC-Boss/gpt-sovits?style=for-the-badge&logo=github)](https://github.com/RVC-Boss/gpt-sovits/releases)
+
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
-[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-blue.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
+[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-yellow.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
diff --git a/docs/cn/README.md b/docs/cn/README.md
index 39e37a42..793734d8 100644
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@@ -7,15 +7,19 @@
 
 <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
+[![Python](https://img.shields.io/badge/python-3.10--3.12-blue?style=for-the-badge&logo=python)](https://www.python.org)
+[![GitHub release](https://img.shields.io/github/v/release/RVC-Boss/gpt-sovits?style=for-the-badge&logo=github)](https://github.com/RVC-Boss/gpt-sovits/releases)
+
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
-[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-blue.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
+[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-yellow.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
 [![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
-[![Change Log](https://img.shields.io/badge/更新日志-查看更新-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/cn/Changelog_CN.md)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/en/Changelog_EN.md)
 [![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
 
+
 [**English**](../../README.md) | **中文简体** | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
 
 </div>
diff --git a/docs/ja/README.md b/docs/ja/README.md
index c3c42cd8..1ee9abb4 100644
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@@ -7,14 +7,18 @@
 
 <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
+[![Python](https://img.shields.io/badge/python-3.10--3.12-blue?style=for-the-badge&logo=python)](https://www.python.org)
+[![GitHub release](https://img.shields.io/github/v/release/RVC-Boss/gpt-sovits?style=for-the-badge&logo=github)](https://github.com/RVC-Boss/gpt-sovits/releases)
+
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-yellow.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
 [![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
-[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/ja/Changelog_JA.md)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/en/Changelog_EN.md)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+
 
 [**English**](../../README.md) | [**中文简体**](../cn/README.md) | **日本語** | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
 
diff --git a/docs/ko/README.md b/docs/ko/README.md
index 1028c00a..9ff32f83 100644
--- a/docs/ko/README.md
+++ b/docs/ko/README.md
@@ -7,14 +7,18 @@
 
 <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
+[![Python](https://img.shields.io/badge/python-3.10--3.12-blue?style=for-the-badge&logo=python)](https://www.python.org)
+[![GitHub release](https://img.shields.io/github/v/release/RVC-Boss/gpt-sovits?style=for-the-badge&logo=github)](https://github.com/RVC-Boss/gpt-sovits/releases)
+
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-yellow.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
 [![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
-[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/ko/Changelog_KO.md)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/en/Changelog_EN.md)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
+
 
 [**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | **한국어** | [**Türkçe**](../tr/README.md)
 
diff --git a/docs/tr/README.md b/docs/tr/README.md
index dd5d79b3..b80c764b 100644
--- a/docs/tr/README.md
+++ b/docs/tr/README.md
@@ -7,14 +7,17 @@ Güçlü Birkaç Örnekli Ses Dönüştürme ve Metinden Konuşmaya Web Arayüz
 
 <a href="https://trendshift.io/repositories/7033" target="_blank"><img src="https://trendshift.io/api/badge/repositories/7033" alt="RVC-Boss%2FGPT-SoVITS | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 
+[![Python](https://img.shields.io/badge/python-3.10--3.12-blue?style=for-the-badge&logo=python)](https://www.python.org)
+[![GitHub release](https://img.shields.io/github/v/release/RVC-Boss/gpt-sovits?style=for-the-badge&logo=github)](https://github.com/RVC-Boss/gpt-sovits/releases)
+
 [![Train In Colab](https://img.shields.io/badge/Colab-Training-F9AB00?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/Colab-WebUI.ipynb)
-[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[![Huggingface](https://img.shields.io/badge/HuggingFace-demo-blue.svg?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
+[![Huggingface](https://img.shields.io/badge/免费在线体验-free_online_demo-yellow.svg?style=for-the-badge&logo=huggingface)](https://lj1995-gpt-sovits-proplus.hf.space/)
 [![Image Size](https://img.shields.io/docker/image-size/xxxxrt666/gpt-sovits/latest?style=for-the-badge&logo=docker)](https://hub.docker.com/r/xxxxrt666/gpt-sovits)
 
 [![简体中文](https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e)
 [![English](https://img.shields.io/badge/English-Read%20Docs-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://rentry.co/GPT-SoVITS-guide#/)
-[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/tr/Changelog_TR.md)
+[![Change Log](https://img.shields.io/badge/Change%20Log-View%20Updates-blue?style=for-the-badge&logo=googledocs&logoColor=white)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/docs/en/Changelog_EN.md)
+[![License](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative)](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
 
 [**English**](../../README.md) | [**中文简体**](../cn/README.md) | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | **Türkçe**
 

From 706bec74f8b80f9ca915b746a960bd72c9a1c4f9 Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:11:08 +0800
Subject: [PATCH 17/27] Update assets.py

---
 tools/assets.py | 90 +++++++++++++------------------------------------
 1 file changed, 24 insertions(+), 66 deletions(-)

diff --git a/tools/assets.py b/tools/assets.py
index 533a020c..6851c064 100644
--- a/tools/assets.py
+++ b/tools/assets.py
@@ -1,81 +1,38 @@
 js = """
-function createGradioAnimation() {
-    
-    const params = new URLSearchParams(window.location.search);
-    if (params.get('__theme') !== 'light') { 
-        params.set('__theme', 'light'); // 仅当 __theme 不是 'light' 时设置为 'light'
-        window.location.search = params.toString(); // 更新 URL，触发页面刷新
-    }
-    
-    var container = document.createElement('div');
-    container.id = 'gradio-animation';
-    container.style.fontSize = '2em';
-    container.style.fontWeight = '500';
-    container.style.textAlign = 'center';
-    container.style.marginBottom = '20px';
-    container.style.fontFamily = '-apple-system, sans-serif, Arial, Calibri';
+function deleteTheme() {
 
-    var text = 'Welcome to GPT-SoVITS !';
-    for (var i = 0; i < text.length; i++) {
-        (function(i){
-            setTimeout(function(){
-                var letter = document.createElement('span');
-                letter.style.opacity = '0';
-                letter.style.transition = 'opacity 0.5s';
-                letter.innerText = text[i];
+const params = new URLSearchParams(window.location.search);
+if (params.has('__theme')) {
+    params.delete('__theme');
+    const newUrl = `${window.location.pathname}?${params.toString()}`;
+    window.location.replace(newUrl);
+}
 
-                container.appendChild(letter);
-
-                setTimeout(function() {
-                    letter.style.opacity = '1';
-                }, 50);
-            }, i * 250);
-        })(i);
-    }
-    return 'Animation created';
 }
 """
 
-
 css = """
 /* CSSStyleRule */
-
 .markdown {
-    background-color: lightblue;
     padding: 6px 10px;
 }
 
-.checkbox_info {
-    color: var(--block-title-text-color) !important;
-    font-size: var(--block-title-text-size) !important;
-    font-weight: var(--block-title-text-weight) !important;
-    height: 22px;
-    margin-bottom: 8px !important;
+@media (prefers-color-scheme: light) {
+    .markdown {
+        background-color: lightblue;
+        color: #000;
+    }
+}
+
+@media (prefers-color-scheme: dark) {
+    .markdown {
+        background-color: #4b4b4b;
+        color: rgb(244, 244, 245);
+    }
 }
 
 ::selection {
-    background: #ffc078; !important;
-}
-
-#checkbox_train_dpo input[type="checkbox"]{
-    margin-top: 6px;
-}
-
-#checkbox_train_dpo span {
-    margin-top: 6px;
-}
-
-#checkbox_align_train {
-    padding-top: 18px;
-    padding-bottom: 18px;
-}
-
-#checkbox_align_infer input[type="checkbox"] {
-    margin-top: 10px;
-}
-
-#checkbox_align_infer span {
-    margin-top: 10px;
+    background: #ffc078 !important;
 }
 
 footer {
@@ -91,6 +48,7 @@ footer * {
 }
 
 """
+
 top_html = """
 <div align="center">
     <div style="margin-bottom: 5px; font-size: 15px;">{}</div>
@@ -98,12 +56,12 @@ top_html = """
         <a href="https://github.com/RVC-Boss/GPT-SoVITS" target="_blank">
             <img src="https://img.shields.io/badge/GitHub-GPT--SoVITS-blue.svg?style=for-the-badge&logo=github" style="width: auto; height: 30px;">
         </a>
-        <a href="https://github.com/RVC-Boss/GPT-SoVITS" target="_blank">
-            <img src="https://img.shields.io/badge/免费在线体验-free_online_demo-blue.svg?style=for-the-badge&logo=huggingface" style="width: auto; height: 30px;">
-        </a>
         <a href="https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e" target="_blank">
             <img src="https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white" style="width: auto; height: 30px;">
         </a>
+        <a href="https://github.com/RVC-Boss/GPT-SoVITS" target="_blank">
+            <img src="https://img.shields.io/badge/免费在线体验-free_online_demo-yellow.svg?style=for-the-badge&logo=huggingface" style="width: auto; height: 30px;">
+        </a>
         <a href="https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e" target="_blank">
             <img src="https://img.shields.io/badge/English-READ%20DOCS-blue?style=for-the-badge&logo=googledocs&logoColor=white" style="width: auto; height: 30px;">
         </a>

From 85035f7ac09b8b90146cc4cd6bf334673e9baac7 Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Mon, 14 Jul 2025 18:56:22 +0800
Subject: [PATCH 18/27] add RTF performence

add RTF performence
---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index be807a08..f9feb80d 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,11 @@ Unseen speakers few-shot fine-tuning demo:
 
 https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
 
+**RTF(inference speed) of GPT-SoVITS**:
+0.028 tested in 4060Ti, 0.014 tested in 4090. You can test our [huggingface demo](https://lj1995-gpt-sovits-proplus.hf.space/) (half H200) to experience high-speed inference .
+
+请不要尬黑GPT-SoVITS推理速度慢，谢谢！
+
 **User guide: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
 
 ## Installation

From 42586e20f7e2d58026ed2fa68ac1ddf41fc48346 Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Mon, 14 Jul 2025 19:01:26 +0800
Subject: [PATCH 19/27] add RTF performence

add RTF performence
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f9feb80d..86d50ea2 100644
--- a/README.md
+++ b/README.md
@@ -43,8 +43,8 @@ Unseen speakers few-shot fine-tuning demo:
 
 https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
 
-**RTF(inference speed) of GPT-SoVITS**:
-0.028 tested in 4060Ti, 0.014 tested in 4090. You can test our [huggingface demo](https://lj1995-gpt-sovits-proplus.hf.space/) (half H200) to experience high-speed inference .
+**RTF(inference speed) of GPT-SoVITS v2 ProPlus**:
+0.028 tested in 4060Ti, 0.014 tested in 4090 (1400words~=4min, inference time is 3.36s), 0.526 in M4 CPU. You can test our [huggingface demo](https://lj1995-gpt-sovits-proplus.hf.space/) (half H200) to experience high-speed inference .
 
 请不要尬黑GPT-SoVITS推理速度慢，谢谢！
 

From e476b01f30312139555d45a78cbd830f557d892c Mon Sep 17 00:00:00 2001
From: jiangsier-xyz <126842484+jiangsier-xyz@users.noreply.github.com>
Date: Wed, 16 Jul 2025 15:42:36 +0800
Subject: [PATCH 20/27] =?UTF-8?q?=E8=A7=A3=E5=86=B3=20TTS.py=20=E6=97=A0?=
 =?UTF-8?q?=E6=B3=95=E8=AF=86=E5=88=AB=E7=9C=9F=E6=AD=A3=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=E7=89=88=E6=9C=AC=20v2Pro=E3=80=81v2ProPlus=20=E7=9A=84?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98=20(#2490)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

同时更新一版默认配置。

Co-authored-by: jiangsier-xyz <jiangsier131@gmail.com>
---
 GPT_SoVITS/TTS_infer_pack/TTS.py  |  2 +-
 GPT_SoVITS/configs/tts_infer.yaml | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py
index 795b55dd..98c10328 100644
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@@ -304,7 +304,7 @@ class TTS_Config:
             configs: dict = self._load_configs(self.configs_path)
 
         assert isinstance(configs, dict)
-        version = configs.get("version", "v2").lower()
+        version = configs.get("version", "v2")
         assert version in ["v1", "v2", "v3", "v4", "v2Pro", "v2ProPlus"]
         self.default_configs[version] = configs.get(version, self.default_configs[version])
         self.configs: dict = configs.get("custom", deepcopy(self.default_configs[version]))
diff --git a/GPT_SoVITS/configs/tts_infer.yaml b/GPT_SoVITS/configs/tts_infer.yaml
index 20c41a20..f31061cc 100644
--- a/GPT_SoVITS/configs/tts_infer.yaml
+++ b/GPT_SoVITS/configs/tts_infer.yaml
@@ -22,6 +22,22 @@ v2:
   t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
   version: v2
   vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
+v2Pro:
+  bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
+  cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
+  device: cpu
+  is_half: false
+  t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt
+  version: v2Pro
+  vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2Pro.pth
+v2ProPlus:
+  bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
+  cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
+  device: cpu
+  is_half: false
+  t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt
+  version: v2ProPlus
+  vits_weights_path: GPT_SoVITS/pretrained_models/v2Pro/s2Gv2ProPlus.pth
 v3:
   bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
   cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base

From 4d8ebf85233d4f1166d7cc02fdc595602975ca8f Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Wed, 16 Jul 2025 15:43:26 +0800
Subject: [PATCH 21/27] Update TTS.py

---
 GPT_SoVITS/TTS_infer_pack/TTS.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py
index 98c10328..da6a6dfe 100644
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@@ -304,8 +304,8 @@ class TTS_Config:
             configs: dict = self._load_configs(self.configs_path)
 
         assert isinstance(configs, dict)
-        version = configs.get("version", "v2")
-        assert version in ["v1", "v2", "v3", "v4", "v2Pro", "v2ProPlus"]
+        version = configs.get("version", "v2").lower()
+        assert version in ["v1", "v2", "v3", "v4", "v2pro", "v2proplus"]
         self.default_configs[version] = configs.get(version, self.default_configs[version])
         self.configs: dict = configs.get("custom", deepcopy(self.default_configs[version]))
 

From 2d09bbe63ab6e33656e7d44b76a96e0072ae994b Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Wed, 16 Jul 2025 15:44:04 +0800
Subject: [PATCH 22/27] Update tts_infer.yaml

---
 GPT_SoVITS/configs/tts_infer.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPT_SoVITS/configs/tts_infer.yaml b/GPT_SoVITS/configs/tts_infer.yaml
index f31061cc..531aeb51 100644
--- a/GPT_SoVITS/configs/tts_infer.yaml
+++ b/GPT_SoVITS/configs/tts_infer.yaml
@@ -1,3 +1,4 @@
+version: v2ProPlus
 custom:
   bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
   cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base

From cefafee32cfc08f0f622ef460578b09485cc189e Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Thu, 17 Jul 2025 20:28:25 +0800
Subject: [PATCH 23/27] Add Distil (#2531)

---
 tools/asr/config.py            | 24 +++++++----
 tools/asr/fasterwhisper_asr.py | 78 +++++++++++++++++++++++++---------
 webui.py                       |  9 ++--
 3 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/tools/asr/config.py b/tools/asr/config.py
index c04069b2..9c26a4f6 100644
--- a/tools/asr/config.py
+++ b/tools/asr/config.py
@@ -6,15 +6,10 @@ def check_fw_local_models():
     启动时检查本地是否有 Faster Whisper 模型.
     """
     model_size_list = [
-        "tiny",
-        "tiny.en",
-        "base",
-        "base.en",
-        "small",
-        "small.en",
         "medium",
         "medium.en",
-        "large",
+        "distil-large-v2",
+        "distil-large-v3",
         "large-v1",
         "large-v2",
         "large-v3",
@@ -25,11 +20,24 @@ def check_fw_local_models():
     return model_size_list
 
 
+def get_models():
+    model_size_list = [
+        "medium",
+        "medium.en",
+        "distil-large-v2",
+        "distil-large-v3",
+        "large-v1",
+        "large-v2",
+        "large-v3",
+    ]
+    return model_size_list
+
+
 asr_dict = {
     "达摩 ASR (中文)": {"lang": ["zh", "yue"], "size": ["large"], "path": "funasr_asr.py", "precision": ["float32"]},
     "Faster Whisper (多语种)": {
         "lang": ["auto", "zh", "en", "ja", "ko", "yue"],
-        "size": check_fw_local_models(),
+        "size": get_models(),
         "path": "fasterwhisper_asr.py",
         "precision": ["float32", "float16", "int8"],
     },
diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py
index 27cabbc2..a2ebe975 100644
--- a/tools/asr/fasterwhisper_asr.py
+++ b/tools/asr/fasterwhisper_asr.py
@@ -1,15 +1,16 @@
 import argparse
 import os
+import time
 import traceback
 
-os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
-os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
-
 import torch
 from faster_whisper import WhisperModel
+from huggingface_hub import snapshot_download
+from huggingface_hub.errors import LocalEntryNotFoundError
 from tqdm import tqdm
 
-from tools.asr.config import check_fw_local_models
+from tools.asr.config import get_models
+from tools.asr.funasr_asr import only_asr
 from tools.my_utils import load_cudnn
 
 # fmt: off
@@ -38,20 +39,54 @@ language_code_list = [
 # fmt: on
 
 
-def execute_asr(input_folder, output_folder, model_size, language, precision):
-    if "-local" in model_size:
-        model_size = model_size[:-6]
-        model_path = f"tools/asr/models/faster-whisper-{model_size}"
+def download_model(model_size: str):
+    if "distil" in model_size:
+        repo_id = "Systran/faster-{}-whisper-{}".format(*model_size.split("-", maxsplit=1))
     else:
-        model_path = model_size
+        repo_id = f"Systran/faster-whisper-{model_size}"
+    model_path = f"tools/asr/models/{repo_id.strip('Systran/')}"
+
+    files: list[str] = [
+        "config.json",
+        "model.bin",
+        "tokenizer.json",
+        "vocabulary.txt",
+    ]
+    if model_size == "large-v3" or "distil" in model_size:
+        files.append("preprocessor_config.json")
+        files.append("vocabulary.json")
+
+        files.remove("vocabulary.txt")
+
+    for attempt in range(2):
+        try:
+            snapshot_download(
+                repo_id=repo_id,
+                allow_patterns=files,
+                local_dir=model_path,
+            )
+            break
+        except LocalEntryNotFoundError:
+            if attempt < 1:
+                time.sleep(2)
+            else:
+                print("[ERROR] LocalEntryNotFoundError and no fallback.")
+                traceback.print_exc()
+                exit(1)
+        except Exception as e:
+            print(f"[ERROR] Unexpected error on attempt {attempt + 1}: {e}")
+            traceback.print_exc()
+            exit(1)
+
+    return model_path
+
+
+def execute_asr(input_folder, output_folder, model_path, language, precision):
     if language == "auto":
         language = None  # 不设置语种由模型自动输出概率最高的语种
-    print("loading faster whisper model:", model_size, model_path)
+    print("loading faster whisper model:", model_path, model_path)
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    try:
-        model = WhisperModel(model_path, device=device, compute_type=precision)
-    except:
-        return print(traceback.format_exc())
+    model = WhisperModel(model_path, device=device, compute_type=precision)
 
     input_file_names = os.listdir(input_folder)
     input_file_names.sort()
@@ -73,16 +108,15 @@ def execute_asr(input_folder, output_folder, model_size, language, precision):
 
             if info.language == "zh":
                 print("检测为中文文本, 转 FunASR 处理")
-                if "only_asr" not in globals():
-                    from tools.asr.funasr_asr import only_asr  # 如果用英文就不需要导入下载模型
                 text = only_asr(file_path, language=info.language.lower())
 
             if text == "":
                 for segment in segments:
                     text += segment.text
             output.append(f"{file_path}|{output_file_name}|{info.language.upper()}|{text}")
-        except:
-            print(traceback.format_exc())
+        except Exception as e:
+            print(e)
+            traceback.print_exc()
 
     output_folder = output_folder or "output/asr_opt"
     os.makedirs(output_folder, exist_ok=True)
@@ -107,7 +141,7 @@ if __name__ == "__main__":
         "--model_size",
         type=str,
         default="large-v3",
-        choices=check_fw_local_models(),
+        choices=get_models(),
         help="Model Size of Faster Whisper",
     )
     parser.add_argument(
@@ -123,10 +157,14 @@ if __name__ == "__main__":
     )
 
     cmd = parser.parse_args()
+    model_size = cmd.model_size
+    if model_size == "large":
+        model_size = "large-v3"
+    model_path = download_model(model_size)
     output_file_path = execute_asr(
         input_folder=cmd.input_folder,
         output_folder=cmd.output_folder,
-        model_size=cmd.model_size,
+        model_path=model_path,
         language=cmd.language,
         precision=cmd.precision,
     )
diff --git a/webui.py b/webui.py
index 9981cfcc..9a6aae5f 100644
--- a/webui.py
+++ b/webui.py
@@ -86,13 +86,10 @@ from config import (
 from tools import my_utils
 from tools.my_utils import check_details, check_for_existance
 
-# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
-try:
-    import gradio.analytics as analytics
+os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
+os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 
-    analytics.version_check = lambda: None
-except:
-    ...
+# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
 import gradio as gr
 
 n_cpu = cpu_count()

From b9211657d8dfe8cd46f6b6eb9cfc55d5989e6548 Mon Sep 17 00:00:00 2001
From: ChasonJiang <46401978+ChasonJiang@users.noreply.github.com>
Date: Fri, 18 Jul 2025 11:54:40 +0800
Subject: [PATCH 24/27] =?UTF-8?q?=E4=BC=98=E5=8C=96TTS=5FConfig=E7=9A=84?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81=E9=80=BB=E8=BE=91=20(#2536)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 优化TTS_Config的代码逻辑

* 在载入vits权重之后保存tts_config
---
 GPT_SoVITS/TTS_infer_pack/TTS.py   | 20 +++++++++++++-------
 GPT_SoVITS/configs/tts_infer.yaml  |  1 -
 GPT_SoVITS/inference_webui_fast.py |  3 ++-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py
index da6a6dfe..0c1d2484 100644
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@@ -304,10 +304,10 @@ class TTS_Config:
             configs: dict = self._load_configs(self.configs_path)
 
         assert isinstance(configs, dict)
-        version = configs.get("version", "v2").lower()
-        assert version in ["v1", "v2", "v3", "v4", "v2pro", "v2proplus"]
-        self.default_configs[version] = configs.get(version, self.default_configs[version])
-        self.configs: dict = configs.get("custom", deepcopy(self.default_configs[version]))
+        configs_ = deepcopy(self.default_configs)
+        configs_.update(configs)
+        self.configs: dict = configs_.get("custom", configs_["v2"])
+        self.default_configs = deepcopy(configs_)
 
         self.device = self.configs.get("device", torch.device("cpu"))
         if "cuda" in str(self.device) and not torch.cuda.is_available():
@@ -315,11 +315,13 @@ class TTS_Config:
             self.device = torch.device("cpu")
 
         self.is_half = self.configs.get("is_half", False)
-        # if str(self.device) == "cpu" and self.is_half:
-        #     print(f"Warning: Half precision is not supported on CPU, set is_half to False.")
-        #     self.is_half = False
+        if str(self.device) == "cpu" and self.is_half:
+            print(f"Warning: Half precision is not supported on CPU, set is_half to False.")
+            self.is_half = False
 
+        version = self.configs.get("version", None)
         self.version = version
+        assert self.version in ["v1", "v2", "v3", "v4", "v2Pro", "v2ProPlus"], "Invalid version!"
         self.t2s_weights_path = self.configs.get("t2s_weights_path", None)
         self.vits_weights_path = self.configs.get("vits_weights_path", None)
         self.bert_base_path = self.configs.get("bert_base_path", None)
@@ -576,6 +578,10 @@ class TTS:
         if self.configs.is_half and str(self.configs.device) != "cpu":
             self.vits_model = self.vits_model.half()
 
+        self.configs.save_configs()
+
+
+
     def init_t2s_weights(self, weights_path: str):
         print(f"Loading Text2Semantic weights from {weights_path}")
         self.configs.t2s_weights_path = weights_path
diff --git a/GPT_SoVITS/configs/tts_infer.yaml b/GPT_SoVITS/configs/tts_infer.yaml
index 531aeb51..f31061cc 100644
--- a/GPT_SoVITS/configs/tts_infer.yaml
+++ b/GPT_SoVITS/configs/tts_infer.yaml
@@ -1,4 +1,3 @@
-version: v2ProPlus
 custom:
   bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
   cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py
index 2c159d81..51a120f1 100644
--- a/GPT_SoVITS/inference_webui_fast.py
+++ b/GPT_SoVITS/inference_webui_fast.py
@@ -125,7 +125,8 @@ is_exist_s2gv4 = os.path.exists(path_sovits_v4)
 tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
 tts_config.device = device
 tts_config.is_half = is_half
-tts_config.version = version
+# tts_config.version = version
+tts_config.update_version(version)
 if gpt_path is not None:
     if "！" in gpt_path or "!" in gpt_path:
         gpt_path = name2gpt_path[gpt_path]

From b5a67e62470fb87e7cea28ecad7c7c3bf7a58afd Mon Sep 17 00:00:00 2001
From: ChasonJiang <46401978+ChasonJiang@users.noreply.github.com>
Date: Fri, 18 Jul 2025 14:59:59 +0800
Subject: [PATCH 25/27] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dgpt=E7=9A=84loss?=
 =?UTF-8?q?=E8=AE=A1=E7=AE=97=E9=97=AE=E9=A2=98=20(#2537)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 修复gpt的loss计算问题

* fallback tts config
---
 GPT_SoVITS/AR/models/t2s_model.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py
index 4725b7a3..7196d6ab 100644
--- a/GPT_SoVITS/AR/models/t2s_model.py
+++ b/GPT_SoVITS/AR/models/t2s_model.py
@@ -356,7 +356,7 @@ class Text2SemanticDecoder(nn.Module):
         x = self.ar_text_embedding(x)
         x = x + self.bert_proj(bert_feature.transpose(1, 2))
         x = self.ar_text_position(x)
-        x_mask = make_pad_mask(x_lens)
+        x_mask = make_pad_mask_left(x_lens)
 
         y_mask = make_pad_mask(y_lens)
         y_mask_int = y_mask.type(torch.int64)
@@ -420,7 +420,7 @@ class Text2SemanticDecoder(nn.Module):
             mask=xy_attn_mask,
         )
         x_len = x_lens.max()
-        logits = self.ar_predict_layer(xy_dec[:, x_len:])
+        logits = self.ar_predict_layer(xy_dec[:, x_len-1:])
 
         ###### DPO #############
         reject_xy_pos, reject_xy_attn_mask, reject_targets = self.make_input_data(
@@ -432,7 +432,7 @@ class Text2SemanticDecoder(nn.Module):
             mask=reject_xy_attn_mask,
         )
         x_len = x_lens.max()
-        reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len:])
+        reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len-1:])
 
         # loss
         # from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
@@ -455,7 +455,7 @@ class Text2SemanticDecoder(nn.Module):
         x = self.ar_text_embedding(x)
         x = x + self.bert_proj(bert_feature.transpose(1, 2))
         x = self.ar_text_position(x)
-        x_mask = make_pad_mask(x_lens)
+        x_mask = make_pad_mask_left(x_lens)
 
         y_mask = make_pad_mask(y_lens)
         y_mask_int = y_mask.type(torch.int64)
@@ -502,7 +502,7 @@ class Text2SemanticDecoder(nn.Module):
             (xy_pos, None),
             mask=xy_attn_mask,
         )
-        logits = self.ar_predict_layer(xy_dec[:, x_len:]).permute(0, 2, 1)
+        logits = self.ar_predict_layer(xy_dec[:, x_len-1:]).permute(0, 2, 1)
         # loss
         # from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
         loss = F.cross_entropy(logits, targets, reduction="sum")
@@ -578,7 +578,7 @@ class Text2SemanticDecoder(nn.Module):
     def pad_y_eos(self, y, y_mask_int, eos_id):
         targets = F.pad(y, (0, 1), value=0) + eos_id * F.pad(y_mask_int, (0, 1), value=1)
         # 错位
-        return targets[:, :-1], targets[:, 1:]
+        return targets[:, :-1], targets
 
     def infer_panel_batch_infer(
         self,

From 0be59c8043a12112934d474ff4cc65658d848e8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <linsmc@126.com>
Date: Sat, 19 Jul 2025 00:29:48 +0800
Subject: [PATCH 26/27] =?UTF-8?q?fix:=20=E6=9B=B4=E6=AD=A3=E9=93=BE?=
 =?UTF-8?q?=E6=8E=A5=20(#2539)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tools/assets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/assets.py b/tools/assets.py
index 6851c064..b2c302fe 100644
--- a/tools/assets.py
+++ b/tools/assets.py
@@ -59,7 +59,7 @@ top_html = """
         <a href="https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e" target="_blank">
             <img src="https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white" style="width: auto; height: 30px;">
         </a>
-        <a href="https://github.com/RVC-Boss/GPT-SoVITS" target="_blank">
+        <a href="https://lj1995-gpt-sovits-proplus.hf.space/" target="_blank">
             <img src="https://img.shields.io/badge/免费在线体验-free_online_demo-yellow.svg?style=for-the-badge&logo=huggingface" style="width: auto; height: 30px;">
         </a>
         <a href="https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e" target="_blank">

From fdf794e31d1fd6f91c5cb4fbb0396094491a31ac Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Sat, 2 Aug 2025 17:47:15 +0800
Subject: [PATCH 27/27] Update WSL Rocm (#2561)

---
 install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install.sh b/install.sh
index a2fa751e..7d80ec28 100644
--- a/install.sh
+++ b/install.sh
@@ -373,7 +373,7 @@ if [ "$USE_ROCM" = true ] && [ "$IS_WSL" = true ]; then
     location=$(pip show torch | grep Location | awk -F ": " '{print $2}')
     cd "${location}"/torch/lib/ || exit
     rm libhsa-runtime64.so*
-    cp /opt/rocm/lib/libhsa-runtime64.so.1.2 libhsa-runtime64.so
+    cp "$(readlink -f /opt/rocm/lib/libhsa-runtime64.so)" libhsa-runtime64.so
     echo -e "${SUCCESS}ROCm Runtime Lib Updated..."
 fi