Compare commits

...

18 Commits

Author SHA1 Message Date
Karasukaigan
e78aada32e
Merge 539ad38d310d452aa1a08e659d5eac22f843a01d into c767f0b83b998e996a4d230d86da575a03f54a3f 2026-01-05 00:00:20 -08:00
ChasonJiang
c767f0b83b
修复bug (#2704)
* 修复bug

* fallbak and bug fix
2025-12-30 16:00:21 +08:00
ChasonJiang
9080a967d5
修复采样错误 (#2703) 2025-12-30 15:21:03 +08:00
sushistack
51df9f7384
Fix model file name in README instructions (#2700) 2025-12-25 16:44:21 +08:00
ChasonJiang
bfca0f6b2d
对齐naive_infer的解码策略,防止吞句 (#2697) 2025-12-19 17:37:19 +08:00
ChasonJiang
abe984395c
对齐gpt topk默认采样参数 (#2696) 2025-12-19 16:05:36 +08:00
RVC-Boss
cc89c3660e
Update requirements.txt 2025-12-19 15:54:54 +08:00
Karasukaigan
539ad38d31
Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements 2025-07-02 15:35:45 +08:00
Karasukaigan
6df7921d56
Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements 2025-06-20 00:30:43 +08:00
Karasukaigan
8d212a9255
Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements 2025-06-06 19:43:11 +08:00
Karasukaigan
2fb92e74a2 更新WebUI简化版以支持V2Pro
更新了webui_simple.py以支持V2Pro系列模型。
2025-06-06 01:20:45 +08:00
Karasukaigan
be16f387f1
Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements 2025-06-05 18:41:42 +08:00
Karasukaigan
7e6a607b9e
Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements 2025-06-05 15:47:39 +08:00
Karasukaigan
ab5e8dc0ae
Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements 2025-05-31 20:15:19 +08:00
Karasukaigan
b666becb19 撤回对于inference_webui的改动 2025-05-27 22:41:20 +08:00
Karasukaigan
47426d18e7 新增微调训练WebUI简化版
用户不再需要多次切换不同的选项卡页面来完成一次微调训练。现在微调训练的所有流程都在同一个页面里,按照从上往下的顺序排好,并且隐藏了非常用的设置项。
2025-05-27 22:22:15 +08:00
Karasukaigan
b8de0ec0ac
Merge branch 'RVC-Boss:main' into feat/frontend-usability-enhancements 2025-05-27 22:11:36 +08:00
Karasukaigan
f20f17c2c0 修复通过Gradio API调用合成语音接口时出现参数类型错误的问题
修复通过Gradio API调用合成语音接口`/get_tts_wav`时出现参数类型错误的问题。

## 报错信息
TypeError: unsupported operand type(s) for /: 'int' and 'str'

## 错误原因
`inference_webui.py`的`get_tts_wav`里并未对传入`sample_steps`的类型进行判断。而由于Gradio在自动生成接口文档时会将`gr.Radio`传入的值判定为字符串,因此如果有用户参考WebUI下面”通过 API 使用“里的说明调用`/get_tts_wav`时,则会因为文档错误导致传参类型错误,从而导致最终的报错。

## 修复方式
通过在`get_tts_wav`开头部分添加对`sample_steps`格式的转换(统一转为int)来解决传参类型错误的问题。
2025-05-16 17:58:56 +08:00
8 changed files with 2097 additions and 10 deletions

View File

@ -707,10 +707,12 @@ class Text2SemanticDecoder(nn.Module):
if idx == 0:
attn_mask = F.pad(attn_mask[:, :, -1].unsqueeze(-2), (0, 1), value=False)
logits = logits[:, :-1]
else:
attn_mask = F.pad(attn_mask, (0, 1), value=False)
if idx < 11: ###至少预测出10个token不然不给停止0.4s
logits = logits[:, :-1]
samples = sample(
logits, y, top_k=top_k, top_p=top_p, repetition_penalty=repetition_penalty, temperature=temperature
)[0]

View File

@ -1008,7 +1008,7 @@ class TTS:
"aux_ref_audio_paths": [], # list.(optional) auxiliary reference audio paths for multi-speaker tone fusion
"prompt_text": "", # str.(optional) prompt text for the reference audio
"prompt_lang": "", # str.(required) language of the prompt text for the reference audio
"top_k": 5, # int. top k sampling
"top_k": 15, # int. top k sampling
"top_p": 1, # float. top p sampling
"temperature": 1, # float. temperature for sampling
"text_split_method": "cut1", # str. text split method, see text_segmentation_method.py for details.
@ -1039,7 +1039,7 @@ class TTS:
aux_ref_audio_paths: list = inputs.get("aux_ref_audio_paths", [])
prompt_text: str = inputs.get("prompt_text", "")
prompt_lang: str = inputs.get("prompt_lang", "")
top_k: int = inputs.get("top_k", 5)
top_k: int = inputs.get("top_k", 15)
top_p: float = inputs.get("top_p", 1)
temperature: float = inputs.get("temperature", 1)
text_split_method: str = inputs.get("text_split_method", "cut1")

View File

@ -385,7 +385,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css
minimum=0.6, maximum=1.65, step=0.05, label="语速", value=1.0, interactive=True
)
with gr.Row():
top_k = gr.Slider(minimum=1, maximum=100, step=1, label=i18n("top_k"), value=5, interactive=True)
top_k = gr.Slider(minimum=1, maximum=100, step=1, label=i18n("top_k"), value=15, interactive=True)
top_p = gr.Slider(minimum=0, maximum=1, step=0.05, label=i18n("top_p"), value=1, interactive=True)
with gr.Row():
temperature = gr.Slider(

View File

@ -347,7 +347,7 @@ Use v4 from v1/v2/v3 environment:
2. Clone the latest codes from github.
3. Download v4 pretrained models (gsv-v4-pretrained/s2v4.ckpt, and gsv-v4-pretrained/vocoder.pth) from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) and put them into `GPT_SoVITS/pretrained_models`.
3. Download v4 pretrained models (gsv-v4-pretrained/s2v4.pth, and gsv-v4-pretrained/vocoder.pth) from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) and put them into `GPT_SoVITS/pretrained_models`.
## V2Pro Release Notes

View File

@ -27,7 +27,7 @@ POST:
"aux_ref_audio_paths": [], # list.(optional) auxiliary reference audio paths for multi-speaker tone fusion
"prompt_text": "", # str.(optional) prompt text for the reference audio
"prompt_lang": "", # str.(required) language of the prompt text for the reference audio
"top_k": 5, # int. top k sampling
"top_k": 15, # int. top k sampling
"top_p": 1, # float. top p sampling
"temperature": 1, # float. temperature for sampling
"text_split_method": "cut5", # str. text split method, see text_segmentation_method.py for details.
@ -158,7 +158,7 @@ class TTS_Request(BaseModel):
aux_ref_audio_paths: list = None
prompt_lang: str = None
prompt_text: str = ""
top_k: int = 5
top_k: int = 15
top_p: float = 1
temperature: float = 1
text_split_method: str = "cut5"
@ -355,7 +355,7 @@ async def tts_handle(req: dict):
"aux_ref_audio_paths": [], # list.(optional) auxiliary reference audio paths for multi-speaker tone fusion
"prompt_text": "", # str.(optional) prompt text for the reference audio
"prompt_lang": "", # str.(required) language of the prompt text for the reference audio
"top_k": 5, # int. top k sampling
"top_k": 15, # int. top k sampling
"top_p": 1, # float. top p sampling
"temperature": 1, # float. temperature for sampling
"text_split_method": "cut5", # str. text split method, see text_segmentation_method.py for details.
@ -460,7 +460,7 @@ async def tts_get_endpoint(
aux_ref_audio_paths: list = None,
prompt_lang: str = None,
prompt_text: str = "",
top_k: int = 5,
top_k: int = 15,
top_p: float = 1,
temperature: float = 1,
text_split_method: str = "cut5",

2
go-webui-simple-mode.bat Normal file
View File

@ -0,0 +1,2 @@
runtime\python.exe -I webui_simple.py zh_CN
pause

View File

@ -19,7 +19,7 @@ torchaudio
modelscope
sentencepiece
transformers>=4.43,<=4.50
peft
peft<0.18.0
chardet
PyYAML
psutil

2083
webui_simple.py Normal file

File diff suppressed because it is too large Load Diff