From f35f6e9b5e529f7bd3c6db36d163a97715ce5f24 Mon Sep 17 00:00:00 2001 From: ChasonJiang <46401978+ChasonJiang@users.noreply.github.com> Date: Thu, 29 Aug 2024 00:33:07 +0800 Subject: [PATCH 1/8] =?UTF-8?q?=E4=BC=98=E5=8C=96tts=5Fconfig=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E9=80=BB=E8=BE=91=20(#1538)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 优化tts_config * fix * 优化报错提示 * 优化报错提示 --- GPT_SoVITS/TTS_infer_pack/TTS.py | 11 ++++++++--- api_v2.py | 6 +++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py index c677d778..a1eeb28c 100644 --- a/GPT_SoVITS/TTS_infer_pack/TTS.py +++ b/GPT_SoVITS/TTS_infer_pack/TTS.py @@ -213,6 +213,10 @@ class TTS_Config: "cnhuhbert_base_path": self.cnhuhbert_base_path, } return self.config + + def update_version(self, version:str)->None: + self.version = version + self.languages = self.v2_languages if self.version=="v2" else self.v1_languages def __str__(self): self.configs = self.update_configs() @@ -300,13 +304,14 @@ class TTS: def init_vits_weights(self, weights_path: str): print(f"Loading VITS weights from {weights_path}") self.configs.vits_weights_path = weights_path - self.configs.save_configs() dict_s2 = torch.load(weights_path, map_location=self.configs.device) hps = dict_s2["config"] if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322: - self.configs.version = "v1" + self.configs.update_version("v1") else: - self.configs.version = "v2" + self.configs.update_version("v2") + self.configs.save_configs() + hps["model"]["version"] = self.configs.version self.configs.filter_length = hps["data"]["filter_length"] self.configs.segment_size = hps["train"]["segment_size"] diff --git a/api_v2.py b/api_v2.py index a9faaebe..ea1d0c7f 100644 --- a/api_v2.py +++ b/api_v2.py @@ -253,13 +253,13 @@ def check_params(req:dict): if (text_lang in [None, ""]) : return JSONResponse(status_code=400, content={"message": "text_lang is required"}) elif text_lang.lower() not in tts_config.languages: - return JSONResponse(status_code=400, content={"message": "text_lang is not supported"}) + return JSONResponse(status_code=400, content={"message": f"text_lang: {text_lang} is not supported in version {tts_config.version}"}) if (prompt_lang in [None, ""]) : return JSONResponse(status_code=400, content={"message": "prompt_lang is required"}) elif prompt_lang.lower() not in tts_config.languages: - return JSONResponse(status_code=400, content={"message": "prompt_lang is not supported"}) + return JSONResponse(status_code=400, content={"message": f"prompt_lang: {prompt_lang} is not supported in version {tts_config.version}"}) if media_type not in ["wav", "raw", "ogg", "aac"]: - return JSONResponse(status_code=400, content={"message": "media_type is not supported"}) + return JSONResponse(status_code=400, content={"message": f"media_type: {media_type} is not supported"}) elif media_type == "ogg" and not streaming_mode: return JSONResponse(status_code=400, content={"message": "ogg format is not supported in non-streaming mode"}) From d67bbd21668b401b28a004083ef0bf24d9dadd1d Mon Sep 17 00:00:00 2001 From: KakaruHayate <97896816+KakaruHayate@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:17:42 +0800 Subject: [PATCH 2/8] Fix typo (#1568) * typo * typo * Update README.md * Update README.md * typo --- README.md | 6 +++--- docs/cn/README.md | 6 +++--- docs/ja/README.md | 6 +++--- docs/ko/README.md | 6 +++--- docs/tr/README.md | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 0d38288e..1fa36a79 100644 --- a/README.md +++ b/README.md @@ -184,8 +184,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### Integrated Package Users - Double-click `go-webui.bat`or use `go-webui.ps` - if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps` + Double-click `go-webui.bat`or use `go-webui.ps1` + if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps1` #### Others @@ -220,7 +220,7 @@ Or maunally switch version in WebUI #### Integrated Package Users - Double-click `go-webui-v2.bat` or use `go-webui-v2.ps` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference` + Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference` #### Others diff --git a/docs/cn/README.md b/docs/cn/README.md index d2f45b3b..a0631283 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -181,8 +181,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|zh|我爱玩原神。 #### 整合包用户 -双击`go-webui.bat`或者使用`go-webui.ps` -若想使用V1,则双击`go-webui-v1.bat`或者使用`go-webui-v1.ps` +双击`go-webui.bat`或者使用`go-webui.ps1` +若想使用V1,则双击`go-webui-v1.bat`或者使用`go-webui-v1.ps1` #### 其他 @@ -217,7 +217,7 @@ python webui.py v1 #### 整合包用户 -双击 `go-webui.bat` 或者使用 `go-webui.ps` ,然后在 `1-GPT-SoVITS-TTS/1C-推理` 中打开推理webUI +双击 `go-webui.bat` 或者使用 `go-webui.ps1` ,然后在 `1-GPT-SoVITS-TTS/1C-推理` 中打开推理webUI #### 其他 diff --git a/docs/ja/README.md b/docs/ja/README.md index 5d01fad9..fd8274aa 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -171,8 +171,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### 統合パッケージ利用者 -`go-webui.bat`をダブルクリックするか、`go-webui.ps`を使用します。 -V1に切り替えたい場合は、`go-webui-v1.bat`をダブルクリックするか、`go-webui-v1.ps`を使用してください。 +`go-webui.bat`をダブルクリックするか、`go-webui.ps1`を使用します。 +V1に切り替えたい場合は、`go-webui-v1.bat`をダブルクリックするか、`go-webui-v1.ps1`を使用してください。 #### その他 @@ -207,7 +207,7 @@ python webui.py v1 <言語(オプション)> #### 統合パッケージ利用者 -`go-webui-v2.bat`をダブルクリックするか、`go-webui-v2.ps`を使用して、`1-GPT-SoVITS-TTS/1C-inference`で推論webuiを開きます。 +`go-webui-v2.bat`をダブルクリックするか、`go-webui-v2.ps1`を使用して、`1-GPT-SoVITS-TTS/1C-inference`で推論webuiを開きます。 #### その他 diff --git a/docs/ko/README.md b/docs/ko/README.md index c0ac9a2d..207792df 100644 --- a/docs/ko/README.md +++ b/docs/ko/README.md @@ -175,8 +175,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### 통합 패키지 사용자 -`go-webui.bat`을 더블 클릭하거나 `go-webui.ps`를 사용하십시오. -V1으로 전환하려면, `go-webui-v1.bat`을 더블 클릭하거나 `go-webui-v1.ps`를 사용하십시오. +`go-webui.bat`을 더블 클릭하거나 `go-webui.ps1`를 사용하십시오. +V1으로 전환하려면, `go-webui-v1.bat`을 더블 클릭하거나 `go-webui-v1.ps1`를 사용하십시오. #### 기타 @@ -211,7 +211,7 @@ python webui.py v1 <언어(옵션)> #### 통합 패키지 사용자 -`go-webui-v2.bat`을 더블 클릭하거나 `go-webui-v2.ps`를 사용한 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다. +`go-webui-v2.bat`을 더블 클릭하거나 `go-webui-v2.ps1`를 사용한 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다. #### 기타 diff --git a/docs/tr/README.md b/docs/tr/README.md index 6eddaeb7..5632cc8e 100644 --- a/docs/tr/README.md +++ b/docs/tr/README.md @@ -172,8 +172,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### Entegre Paket Kullanıcıları -`go-webui.bat` dosyasına çift tıklayın veya `go-webui.ps` kullanın. -V1'e geçmek istiyorsanız, `go-webui-v1.bat` dosyasına çift tıklayın veya `go-webui-v1.ps` kullanın. +`go-webui.bat` dosyasına çift tıklayın veya `go-webui.ps1` kullanın. +V1'e geçmek istiyorsanız, `go-webui-v1.bat` dosyasına çift tıklayın veya `go-webui-v1.ps1` kullanın. #### Diğerleri @@ -208,7 +208,7 @@ veya WebUI'de manuel olarak sürüm değiştirin. #### Entegre Paket Kullanıcıları -`go-webui-v2.bat` dosyasına çift tıklayın veya `go-webui-v2.ps` kullanın, ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın. +`go-webui-v2.bat` dosyasına çift tıklayın veya `go-webui-v2.ps1` kullanın, ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın. #### Diğerleri @@ -330,4 +330,4 @@ python ./tools/asr/fasterwhisper_asr.py -i -o <çıktı> -l - \ No newline at end of file + From 3488cffd68752ce72f91d581585770ff18c9a1b3 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sat, 7 Sep 2024 15:15:14 +0800 Subject: [PATCH 3/8] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index fe9c18fa..280d9d99 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ ko_pron opencc; sys_platform != 'linux' opencc==1.1.1; sys_platform == 'linux' python_mecab_ko; sys_platform != 'win32' +fastapi<0.112.2 From 40cd22e69d439954a74914855f05e9c4c3ab26da Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 10 Sep 2024 19:13:42 +0800 Subject: [PATCH 4/8] Update t2s_model.py --- GPT_SoVITS/AR/models/t2s_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py index 31acadcc..fb528914 100644 --- a/GPT_SoVITS/AR/models/t2s_model.py +++ b/GPT_SoVITS/AR/models/t2s_model.py @@ -854,6 +854,7 @@ class Text2SemanticDecoder(nn.Module): if idx == 0: xy_attn_mask = None + if(idx<11):###至少预测出10个token不然不给停止(0.4s) logits = logits[:, :-1] samples = sample( From 570da092c9da2466e05e4e31eca67f44baa24fd4 Mon Sep 17 00:00:00 2001 From: KamioRinn <63162909+KamioRinn@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:26:33 +0800 Subject: [PATCH 5/8] Fix maoniu (#1605) * Fix maoniu * Fix maon --- GPT_SoVITS/text/g2pw/polyphonic-fix.rep | 4 +++- GPT_SoVITS/text/g2pw/polyphonic.pickle | Bin 1322350 -> 1322387 bytes 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep index 10b3a706..1800b886 100644 --- a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep +++ b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep @@ -45021,4 +45021,6 @@ 黄冠野服: ['huang2', 'guan4', 'ye3', 'fu2'] 黄发台背: ['huang2', 'fa1', 'tai2', 'bei4'] 鼎铛玉石: ['ding3', 'cheng1', 'yu4', 'shi2'] -齿豁头童: ['chi3', 'huo1', 'tou2', 'tong2'] \ No newline at end of file +齿豁头童: ['chi3', 'huo1', 'tou2', 'tong2'] +牦牛: ['mao2', 'niu2'] +牦: ['mao2'] \ No newline at end of file diff --git a/GPT_SoVITS/text/g2pw/polyphonic.pickle b/GPT_SoVITS/text/g2pw/polyphonic.pickle index be8aeebbf21c83998c43eab6761b269257561639..fbe46ce1dcc498e4cc508f5a68e77bee3428a393 100644 GIT binary patch delta 95 zcmaEND`4{NfQA;v7N!>F7M2#)7Pc1l7LFFq7OocV7M>Q~7QPn#7J(MQ7NHj5DI(t) tr)>QpQlg{Y!}h#$+4IiXQ(~uRWaY9jFl3zt5~)4RKw+R5SSYbn4*-z5B7pz^ delta 58 zcmbPyJK)`|fQA;v7N!>F7M2#)7Pc1l7LFFq7OocV7M>Q~7QPn#7J(MQ7NHj5DI(t) O^SAyGDbZmn)dK)-D--qr From 0c000191b3312fb7430909bc1db17beacacb63f5 Mon Sep 17 00:00:00 2001 From: zzz <458761603@qq.com> Date: Fri, 13 Sep 2024 11:27:22 +0800 Subject: [PATCH 6/8] Fix onnx_export to support v2 (#1604) --- GPT_SoVITS/module/models_onnx.py | 27 +++++++++++--- GPT_SoVITS/onnx_export.py | 62 ++++++++++++++++++-------------- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/GPT_SoVITS/module/models_onnx.py b/GPT_SoVITS/module/models_onnx.py index 232fd74d..77ae3074 100644 --- a/GPT_SoVITS/module/models_onnx.py +++ b/GPT_SoVITS/module/models_onnx.py @@ -13,7 +13,9 @@ from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm from module.commons import init_weights, get_padding from module.mrte_model import MRTE from module.quantize import ResidualVectorQuantizer -from text import symbols +# from text import symbols +from text import symbols as symbols_v1 +from text import symbols2 as symbols_v2 from torch.cuda.amp import autocast @@ -182,6 +184,7 @@ class TextEncoder(nn.Module): kernel_size, p_dropout, latent_channels=192, + version="v2", ): super().__init__() self.out_channels = out_channels @@ -192,6 +195,7 @@ class TextEncoder(nn.Module): self.kernel_size = kernel_size self.p_dropout = p_dropout self.latent_channels = latent_channels + self.version = version self.ssl_proj = nn.Conv1d(768, hidden_channels, 1) @@ -207,6 +211,11 @@ class TextEncoder(nn.Module): self.encoder_text = attentions.Encoder( hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout ) + + if self.version == "v1": + symbols = symbols_v1.symbols + else: + symbols = symbols_v2.symbols self.text_embedding = nn.Embedding(len(symbols), hidden_channels) self.mrte = MRTE() @@ -817,6 +826,7 @@ class SynthesizerTrn(nn.Module): use_sdp=True, semantic_frame_rate=None, freeze_quantizer=None, + version="v2", **kwargs ): super().__init__() @@ -837,6 +847,7 @@ class SynthesizerTrn(nn.Module): self.segment_size = segment_size self.n_speakers = n_speakers self.gin_channels = gin_channels + self.version = version self.use_sdp = use_sdp self.enc_p = TextEncoder( @@ -847,6 +858,7 @@ class SynthesizerTrn(nn.Module): n_layers, kernel_size, p_dropout, + version=version, ) self.dec = Generator( inter_channels, @@ -871,9 +883,11 @@ class SynthesizerTrn(nn.Module): inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels ) - self.ref_enc = modules.MelStyleEncoder( - spec_channels, style_vector_dim=gin_channels - ) + # self.version=os.environ.get("version","v1") + if self.version == "v1": + self.ref_enc = modules.MelStyleEncoder(spec_channels, style_vector_dim=gin_channels) + else: + self.ref_enc = modules.MelStyleEncoder(704, style_vector_dim=gin_channels) ssl_dim = 768 self.ssl_dim = ssl_dim @@ -894,7 +908,10 @@ class SynthesizerTrn(nn.Module): def forward(self, codes, text, refer): refer_mask = torch.ones_like(refer[:1,:1,:]) - ge = self.ref_enc(refer * refer_mask, refer_mask) + if (self.version == "v1"): + ge = self.ref_enc(refer * refer_mask, refer_mask) + else: + ge = self.ref_enc(refer[:, :704] * refer_mask, refer_mask) quantized = self.quantizer.decode(codes) if self.semantic_frame_rate == "25hz": diff --git a/GPT_SoVITS/onnx_export.py b/GPT_SoVITS/onnx_export.py index ab457d75..43aac19a 100644 --- a/GPT_SoVITS/onnx_export.py +++ b/GPT_SoVITS/onnx_export.py @@ -1,11 +1,12 @@ -from module.models_onnx import SynthesizerTrn, symbols +from module.models_onnx import SynthesizerTrn, symbols_v1, symbols_v2 from AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule import torch import torchaudio from torch import nn from feature_extractor import cnhubert -cnhubert_base_path = "pretrained_models/chinese-hubert-base" -cnhubert.cnhubert_base_path=cnhubert_base_path + +cnhubert_base_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base" +cnhubert.cnhubert_base_path = cnhubert_base_path ssl_model = cnhubert.get_model() from text import cleaned_text_to_sequence import soundfile @@ -196,6 +197,11 @@ class VitsModel(nn.Module): super().__init__() dict_s2 = torch.load(vits_path,map_location="cpu") self.hps = dict_s2["config"] + if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322: + self.hps["model"]["version"] = "v1" + else: + self.hps["model"]["version"] = "v2" + self.hps = DictToAttrRecursive(self.hps) self.hps.model.semantic_frame_rate = "25hz" self.vq_model = SynthesizerTrn( @@ -267,13 +273,13 @@ class SSLModel(nn.Module): return self.ssl.model(ref_audio_16k)["last_hidden_state"].transpose(1, 2) -def export(vits_path, gpt_path, project_name): +def export(vits_path, gpt_path, project_name, vits_model="v2"): vits = VitsModel(vits_path) gpt = T2SModel(gpt_path, vits) gpt_sovits = GptSoVits(vits, gpt) ssl = SSLModel() - ref_seq = torch.LongTensor([cleaned_text_to_sequence(["n", "i2", "h", "ao3", ",", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"])]) - text_seq = torch.LongTensor([cleaned_text_to_sequence(["w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"])]) + ref_seq = torch.LongTensor([cleaned_text_to_sequence(["n", "i2", "h", "ao3", ",", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"],version=vits_model)]) + text_seq = torch.LongTensor([cleaned_text_to_sequence(["w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"],version=vits_model)]) ref_bert = torch.randn((ref_seq.shape[1], 1024)).float() text_bert = torch.randn((text_seq.shape[1], 1024)).float() ref_audio = torch.randn((1, 48000 * 5)).float() @@ -287,34 +293,38 @@ def export(vits_path, gpt_path, project_name): pass ssl_content = ssl(ref_audio_16k).float() - - debug = False + + # debug = False + debug = True + + # gpt_sovits.export(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content, project_name) if debug: a, b = gpt_sovits(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content, debug=debug) soundfile.write("out1.wav", a.cpu().detach().numpy(), vits.hps.data.sampling_rate) soundfile.write("out2.wav", b[0], vits.hps.data.sampling_rate) - return - - a = gpt_sovits(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content).detach().cpu().numpy() + else: + a = gpt_sovits(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content).detach().cpu().numpy() + soundfile.write("out.wav", a, vits.hps.data.sampling_rate) - soundfile.write("out.wav", a, vits.hps.data.sampling_rate) - - gpt_sovits.export(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content, project_name) + if vits_model == "v1": + symbols = symbols_v1 + else: + symbols = symbols_v2 MoeVSConf = { - "Folder" : f"{project_name}", - "Name" : f"{project_name}", - "Type" : "GPT-SoVits", - "Rate" : vits.hps.data.sampling_rate, - "NumLayers": gpt.t2s_model.num_layers, - "EmbeddingDim": gpt.t2s_model.embedding_dim, - "Dict": "BasicDict", - "BertPath": "chinese-roberta-wwm-ext-large", - "Symbol": symbols, - "AddBlank": False - } - + "Folder": f"{project_name}", + "Name": f"{project_name}", + "Type": "GPT-SoVits", + "Rate": vits.hps.data.sampling_rate, + "NumLayers": gpt.t2s_model.num_layers, + "EmbeddingDim": gpt.t2s_model.embedding_dim, + "Dict": "BasicDict", + "BertPath": "chinese-roberta-wwm-ext-large", + # "Symbol": symbols, + "AddBlank": False, + } + MoeVSConfJson = json.dumps(MoeVSConf) with open(f"onnx/{project_name}.json", 'w') as MoeVsConfFile: json.dump(MoeVSConf, MoeVsConfFile, indent = 4) From 192ea6f6c99e44cac62f7ee24e9521c05994d105 Mon Sep 17 00:00:00 2001 From: KamioRinn <63162909+KamioRinn@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:08:57 +0800 Subject: [PATCH 7/8] Add Liang (#1613) --- GPT_SoVITS/text/zh_normalization/num.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GPT_SoVITS/text/zh_normalization/num.py b/GPT_SoVITS/text/zh_normalization/num.py index 356acdea..c0460a03 100644 --- a/GPT_SoVITS/text/zh_normalization/num.py +++ b/GPT_SoVITS/text/zh_normalization/num.py @@ -186,6 +186,7 @@ def replace_positive_quantifier(match) -> str: match_2: str = match_2 if match_2 else "" quantifiers: str = match.group(3) number: str = num2str(number) + number = "两" if number == "二" else number result = f"{number}{match_2}{quantifiers}" return result From 78c68d46cb969628695d57c4f88b6bceadabb5fe Mon Sep 17 00:00:00 2001 From: Spr_Aachen <51275522+Spr-Aachen@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:09:58 +0800 Subject: [PATCH 8/8] Fix model_source path (#1619) --- GPT_SoVITS/text/chinese2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py index 9570295c..f716b410 100644 --- a/GPT_SoVITS/text/chinese2.py +++ b/GPT_SoVITS/text/chinese2.py @@ -27,7 +27,7 @@ if is_g2pw: print("当前使用g2pw进行拼音推理") from text.g2pw import G2PWPinyin, correct_pronunciation parent_directory = os.path.dirname(current_file_path) - g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",v_to_u=False, neutral_tone_with_five=True) + g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True) rep_map = { ":": ",",