Merge 539ad38d310d452aa1a08e659d5eac22f843a01d into c767f0b83b998e996a4d230d86da575a03f54a3f

修复bug (#2704 )
* 修复bug * fallbak and bug fix
2026-01-10 12:26:57 +08:00 · 2026-01-05 00:00:20 -08:00 · 2025-12-30 16:00:21 +08:00 · 2025-12-30 15:21:03 +08:00 · 2025-12-25 16:44:21 +08:00 · 2025-12-19 17:37:19 +08:00
6 changed files with 12 additions and 10 deletions
--- a/GPT_SoVITS/AR/models/t2s_model.py
+++ b/GPT_SoVITS/AR/models/t2s_model.py
@ -707,10 +707,12 @@ class Text2SemanticDecoder(nn.Module):

            if idx == 0:
                attn_mask = F.pad(attn_mask[:, :, -1].unsqueeze(-2), (0, 1), value=False)
-                logits = logits[:, :-1]
            else:
                attn_mask = F.pad(attn_mask, (0, 1), value=False)

+            if idx < 11:  ###至少预测出10个token不然不给停止（0.4s）
+                logits = logits[:, :-1] 
+
            samples = sample(
                logits, y, top_k=top_k, top_p=top_p, repetition_penalty=repetition_penalty, temperature=temperature
            )[0]
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@ -1008,7 +1008,7 @@ class TTS:
                    "aux_ref_audio_paths": [],    # list.(optional) auxiliary reference audio paths for multi-speaker tone fusion
                    "prompt_text": "",            # str.(optional) prompt text for the reference audio
                    "prompt_lang": "",            # str.(required) language of the prompt text for the reference audio
-                    "top_k": 5,                   # int. top k sampling
+                    "top_k": 15,                  # int. top k sampling
                    "top_p": 1,                   # float. top p sampling
                    "temperature": 1,             # float. temperature for sampling
                    "text_split_method": "cut1",  # str. text split method, see text_segmentation_method.py for details.
@ -1039,7 +1039,7 @@ class TTS:
        aux_ref_audio_paths: list = inputs.get("aux_ref_audio_paths", [])
        prompt_text: str = inputs.get("prompt_text", "")
        prompt_lang: str = inputs.get("prompt_lang", "")
-        top_k: int = inputs.get("top_k", 5)
+        top_k: int = inputs.get("top_k", 15)
        top_p: float = inputs.get("top_p", 1)
        temperature: float = inputs.get("temperature", 1)
        text_split_method: str = inputs.get("text_split_method", "cut1")
--- a/GPT_SoVITS/inference_webui_fast.py
+++ b/GPT_SoVITS/inference_webui_fast.py
@ -385,7 +385,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
                        minimum=0.6, maximum=1.65, step=0.05, label="语速", value=1.0, interactive=True
                    )
                with gr.Row():
-                    top_k = gr.Slider(minimum=1, maximum=100, step=1, label=i18n("top_k"), value=5, interactive=True)
+                    top_k = gr.Slider(minimum=1, maximum=100, step=1, label=i18n("top_k"), value=15, interactive=True)
                    top_p = gr.Slider(minimum=0, maximum=1, step=0.05, label=i18n("top_p"), value=1, interactive=True)
                with gr.Row():
                    temperature = gr.Slider(
--- a/README.md
+++ b/README.md
@ -347,7 +347,7 @@ Use v4 from v1/v2/v3 environment:

 2. Clone the latest codes from github.

-3. Download v4 pretrained models (gsv-v4-pretrained/s2v4.ckpt, and gsv-v4-pretrained/vocoder.pth) from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) and put them into `GPT_SoVITS/pretrained_models`.
+3. Download v4 pretrained models (gsv-v4-pretrained/s2v4.pth, and gsv-v4-pretrained/vocoder.pth) from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) and put them into `GPT_SoVITS/pretrained_models`.

 ## V2Pro Release Notes

--- a/api_v2.py
+++ b/api_v2.py
@ -27,7 +27,7 @@ POST:
    "aux_ref_audio_paths": [],    # list.(optional) auxiliary reference audio paths for multi-speaker tone fusion
    "prompt_text": "",            # str.(optional) prompt text for the reference audio
    "prompt_lang": "",            # str.(required) language of the prompt text for the reference audio
-    "top_k": 5,                   # int. top k sampling
+    "top_k": 15,                  # int. top k sampling
    "top_p": 1,                   # float. top p sampling
    "temperature": 1,             # float. temperature for sampling
    "text_split_method": "cut5",  # str. text split method, see text_segmentation_method.py for details.
@ -158,7 +158,7 @@ class TTS_Request(BaseModel):
    aux_ref_audio_paths: list = None
    prompt_lang: str = None
    prompt_text: str = ""
-    top_k: int = 5
+    top_k: int = 15
    top_p: float = 1
    temperature: float = 1
    text_split_method: str = "cut5"
@ -355,7 +355,7 @@ async def tts_handle(req: dict):
                "aux_ref_audio_paths": [],    # list.(optional) auxiliary reference audio paths for multi-speaker tone fusion
                "prompt_text": "",            # str.(optional) prompt text for the reference audio
                "prompt_lang": "",            # str.(required) language of the prompt text for the reference audio
-                "top_k": 5,                   # int. top k sampling
+                "top_k": 15,                  # int. top k sampling
                "top_p": 1,                   # float. top p sampling
                "temperature": 1,             # float. temperature for sampling
                "text_split_method": "cut5",  # str. text split method, see text_segmentation_method.py for details.
@ -460,7 +460,7 @@ async def tts_get_endpoint(
    aux_ref_audio_paths: list = None,
    prompt_lang: str = None,
    prompt_text: str = "",
-    top_k: int = 5,
+    top_k: int = 15,
    top_p: float = 1,
    temperature: float = 1,
    text_split_method: str = "cut5",
--- a/requirements.txt
+++ b/requirements.txt
@ -19,7 +19,7 @@ torchaudio
 modelscope
 sentencepiece
 transformers>=4.43,<=4.50
-peft
+peft<0.18.0
 chardet
 PyYAML
 psutil
Author	SHA1	Message	Date
Karasukaigan	e78aada32e	Merge 539ad38d310d452aa1a08e659d5eac22f843a01d into c767f0b83b998e996a4d230d86da575a03f54a3f	2026-01-05 00:00:20 -08:00
ChasonJiang	c767f0b83b	修复bug (#2704 ) * 修复bug * fallbak and bug fix	2025-12-30 16:00:21 +08:00
ChasonJiang	9080a967d5	修复采样错误 (#2703 )	2025-12-30 15:21:03 +08:00
sushistack	51df9f7384	Fix model file name in README instructions (#2700 )	2025-12-25 16:44:21 +08:00
ChasonJiang	bfca0f6b2d	对齐naive_infer的解码策略，防止吞句 (#2697 )	2025-12-19 17:37:19 +08:00
ChasonJiang	abe984395c	对齐gpt topk默认采样参数 (#2696 )	2025-12-19 16:05:36 +08:00
RVC-Boss	cc89c3660e	Update requirements.txt	2025-12-19 15:54:54 +08:00