diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py index 3cae7b37..12afca21 100644 --- a/GPT_SoVITS/AR/models/t2s_model.py +++ b/GPT_SoVITS/AR/models/t2s_model.py @@ -854,6 +854,7 @@ class Text2SemanticDecoder(nn.Module): if idx == 0: xy_attn_mask = None + if(idx<11):###至少预测出10个token不然不给停止(0.4s) logits = logits[:, :-1] samples = sample( diff --git a/GPT_SoVITS/TTS_infer_pack/TTS.py b/GPT_SoVITS/TTS_infer_pack/TTS.py index c677d778..a1eeb28c 100644 --- a/GPT_SoVITS/TTS_infer_pack/TTS.py +++ b/GPT_SoVITS/TTS_infer_pack/TTS.py @@ -213,6 +213,10 @@ class TTS_Config: "cnhuhbert_base_path": self.cnhuhbert_base_path, } return self.config + + def update_version(self, version:str)->None: + self.version = version + self.languages = self.v2_languages if self.version=="v2" else self.v1_languages def __str__(self): self.configs = self.update_configs() @@ -300,13 +304,14 @@ class TTS: def init_vits_weights(self, weights_path: str): print(f"Loading VITS weights from {weights_path}") self.configs.vits_weights_path = weights_path - self.configs.save_configs() dict_s2 = torch.load(weights_path, map_location=self.configs.device) hps = dict_s2["config"] if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322: - self.configs.version = "v1" + self.configs.update_version("v1") else: - self.configs.version = "v2" + self.configs.update_version("v2") + self.configs.save_configs() + hps["model"]["version"] = self.configs.version self.configs.filter_length = hps["data"]["filter_length"] self.configs.segment_size = hps["train"]["segment_size"] diff --git a/GPT_SoVITS/module/models_onnx.py b/GPT_SoVITS/module/models_onnx.py index b39f4b85..c5d96d0c 100644 --- a/GPT_SoVITS/module/models_onnx.py +++ b/GPT_SoVITS/module/models_onnx.py @@ -16,6 +16,7 @@ from module.quantize import ResidualVectorQuantizer # from text import symbols from text import symbols as symbols_v1 from text import symbols2 as symbols_v2 +from torch.cuda.amp import autocast class StochasticDurationPredictor(nn.Module): diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py index 9570295c..f716b410 100644 --- a/GPT_SoVITS/text/chinese2.py +++ b/GPT_SoVITS/text/chinese2.py @@ -27,7 +27,7 @@ if is_g2pw: print("当前使用g2pw进行拼音推理") from text.g2pw import G2PWPinyin, correct_pronunciation parent_directory = os.path.dirname(current_file_path) - g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",v_to_u=False, neutral_tone_with_five=True) + g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True) rep_map = { ":": ",", diff --git a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep index 10b3a706..1800b886 100644 --- a/GPT_SoVITS/text/g2pw/polyphonic-fix.rep +++ b/GPT_SoVITS/text/g2pw/polyphonic-fix.rep @@ -45021,4 +45021,6 @@ 黄冠野服: ['huang2', 'guan4', 'ye3', 'fu2'] 黄发台背: ['huang2', 'fa1', 'tai2', 'bei4'] 鼎铛玉石: ['ding3', 'cheng1', 'yu4', 'shi2'] -齿豁头童: ['chi3', 'huo1', 'tou2', 'tong2'] \ No newline at end of file +齿豁头童: ['chi3', 'huo1', 'tou2', 'tong2'] +牦牛: ['mao2', 'niu2'] +牦: ['mao2'] \ No newline at end of file diff --git a/GPT_SoVITS/text/g2pw/polyphonic.pickle b/GPT_SoVITS/text/g2pw/polyphonic.pickle index be8aeebb..fbe46ce1 100644 Binary files a/GPT_SoVITS/text/g2pw/polyphonic.pickle and b/GPT_SoVITS/text/g2pw/polyphonic.pickle differ diff --git a/GPT_SoVITS/text/zh_normalization/num.py b/GPT_SoVITS/text/zh_normalization/num.py index 356acdea..c0460a03 100644 --- a/GPT_SoVITS/text/zh_normalization/num.py +++ b/GPT_SoVITS/text/zh_normalization/num.py @@ -186,6 +186,7 @@ def replace_positive_quantifier(match) -> str: match_2: str = match_2 if match_2 else "" quantifiers: str = match.group(3) number: str = num2str(number) + number = "两" if number == "二" else number result = f"{number}{match_2}{quantifiers}" return result diff --git a/README.md b/README.md index 0d38288e..1fa36a79 100644 --- a/README.md +++ b/README.md @@ -184,8 +184,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### Integrated Package Users - Double-click `go-webui.bat`or use `go-webui.ps` - if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps` + Double-click `go-webui.bat`or use `go-webui.ps1` + if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps1` #### Others @@ -220,7 +220,7 @@ Or maunally switch version in WebUI #### Integrated Package Users - Double-click `go-webui-v2.bat` or use `go-webui-v2.ps` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference` + Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference` #### Others diff --git a/api_v2.py b/api_v2.py index a9faaebe..ea1d0c7f 100644 --- a/api_v2.py +++ b/api_v2.py @@ -253,13 +253,13 @@ def check_params(req:dict): if (text_lang in [None, ""]) : return JSONResponse(status_code=400, content={"message": "text_lang is required"}) elif text_lang.lower() not in tts_config.languages: - return JSONResponse(status_code=400, content={"message": "text_lang is not supported"}) + return JSONResponse(status_code=400, content={"message": f"text_lang: {text_lang} is not supported in version {tts_config.version}"}) if (prompt_lang in [None, ""]) : return JSONResponse(status_code=400, content={"message": "prompt_lang is required"}) elif prompt_lang.lower() not in tts_config.languages: - return JSONResponse(status_code=400, content={"message": "prompt_lang is not supported"}) + return JSONResponse(status_code=400, content={"message": f"prompt_lang: {prompt_lang} is not supported in version {tts_config.version}"}) if media_type not in ["wav", "raw", "ogg", "aac"]: - return JSONResponse(status_code=400, content={"message": "media_type is not supported"}) + return JSONResponse(status_code=400, content={"message": f"media_type: {media_type} is not supported"}) elif media_type == "ogg" and not streaming_mode: return JSONResponse(status_code=400, content={"message": "ogg format is not supported in non-streaming mode"}) diff --git a/docs/cn/README.md b/docs/cn/README.md index d2f45b3b..a0631283 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -181,8 +181,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|zh|我爱玩原神。 #### 整合包用户 -双击`go-webui.bat`或者使用`go-webui.ps` -若想使用V1,则双击`go-webui-v1.bat`或者使用`go-webui-v1.ps` +双击`go-webui.bat`或者使用`go-webui.ps1` +若想使用V1,则双击`go-webui-v1.bat`或者使用`go-webui-v1.ps1` #### 其他 @@ -217,7 +217,7 @@ python webui.py v1 #### 整合包用户 -双击 `go-webui.bat` 或者使用 `go-webui.ps` ,然后在 `1-GPT-SoVITS-TTS/1C-推理` 中打开推理webUI +双击 `go-webui.bat` 或者使用 `go-webui.ps1` ,然后在 `1-GPT-SoVITS-TTS/1C-推理` 中打开推理webUI #### 其他 diff --git a/docs/ja/README.md b/docs/ja/README.md index 5d01fad9..fd8274aa 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -171,8 +171,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### 統合パッケージ利用者 -`go-webui.bat`をダブルクリックするか、`go-webui.ps`を使用します。 -V1に切り替えたい場合は、`go-webui-v1.bat`をダブルクリックするか、`go-webui-v1.ps`を使用してください。 +`go-webui.bat`をダブルクリックするか、`go-webui.ps1`を使用します。 +V1に切り替えたい場合は、`go-webui-v1.bat`をダブルクリックするか、`go-webui-v1.ps1`を使用してください。 #### その他 @@ -207,7 +207,7 @@ python webui.py v1 <言語(オプション)> #### 統合パッケージ利用者 -`go-webui-v2.bat`をダブルクリックするか、`go-webui-v2.ps`を使用して、`1-GPT-SoVITS-TTS/1C-inference`で推論webuiを開きます。 +`go-webui-v2.bat`をダブルクリックするか、`go-webui-v2.ps1`を使用して、`1-GPT-SoVITS-TTS/1C-inference`で推論webuiを開きます。 #### その他 diff --git a/docs/ko/README.md b/docs/ko/README.md index c0ac9a2d..207792df 100644 --- a/docs/ko/README.md +++ b/docs/ko/README.md @@ -175,8 +175,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### 통합 패키지 사용자 -`go-webui.bat`을 더블 클릭하거나 `go-webui.ps`를 사용하십시오. -V1으로 전환하려면, `go-webui-v1.bat`을 더블 클릭하거나 `go-webui-v1.ps`를 사용하십시오. +`go-webui.bat`을 더블 클릭하거나 `go-webui.ps1`를 사용하십시오. +V1으로 전환하려면, `go-webui-v1.bat`을 더블 클릭하거나 `go-webui-v1.ps1`를 사용하십시오. #### 기타 @@ -211,7 +211,7 @@ python webui.py v1 <언어(옵션)> #### 통합 패키지 사용자 -`go-webui-v2.bat`을 더블 클릭하거나 `go-webui-v2.ps`를 사용한 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다. +`go-webui-v2.bat`을 더블 클릭하거나 `go-webui-v2.ps1`를 사용한 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다. #### 기타 diff --git a/docs/tr/README.md b/docs/tr/README.md index 6eddaeb7..5632cc8e 100644 --- a/docs/tr/README.md +++ b/docs/tr/README.md @@ -172,8 +172,8 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. #### Entegre Paket Kullanıcıları -`go-webui.bat` dosyasına çift tıklayın veya `go-webui.ps` kullanın. -V1'e geçmek istiyorsanız, `go-webui-v1.bat` dosyasına çift tıklayın veya `go-webui-v1.ps` kullanın. +`go-webui.bat` dosyasına çift tıklayın veya `go-webui.ps1` kullanın. +V1'e geçmek istiyorsanız, `go-webui-v1.bat` dosyasına çift tıklayın veya `go-webui-v1.ps1` kullanın. #### Diğerleri @@ -208,7 +208,7 @@ veya WebUI'de manuel olarak sürüm değiştirin. #### Entegre Paket Kullanıcıları -`go-webui-v2.bat` dosyasına çift tıklayın veya `go-webui-v2.ps` kullanın, ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın. +`go-webui-v2.bat` dosyasına çift tıklayın veya `go-webui-v2.ps1` kullanın, ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın. #### Diğerleri @@ -330,4 +330,4 @@ python ./tools/asr/fasterwhisper_asr.py -i -o <çıktı> -l - \ No newline at end of file + diff --git a/requirements.txt b/requirements.txt index fe9c18fa..280d9d99 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ ko_pron opencc; sys_platform != 'linux' opencc==1.1.1; sys_platform == 'linux' python_mecab_ko; sys_platform != 'win32' +fastapi<0.112.2