From 39e77a61aca7285315f6a070979c001e66b43fc3 Mon Sep 17 00:00:00 2001 From: samiabat Date: Wed, 9 Jul 2025 02:18:57 +0300 Subject: [PATCH] add allc --- GPT_SoVITS/inference_cli.py | 6 +- GPT_SoVITS/inference_webui.py | 4 +- api.py | 246 +--------------------------------- 3 files changed, 9 insertions(+), 247 deletions(-) diff --git a/GPT_SoVITS/inference_cli.py b/GPT_SoVITS/inference_cli.py index 43004a21..05ae4e34 100644 --- a/GPT_SoVITS/inference_cli.py +++ b/GPT_SoVITS/inference_cli.py @@ -3,7 +3,7 @@ import os import soundfile as sf from tools.i18n.i18n import I18nAuto -from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav +from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav, test i18n = I18nAuto() @@ -11,9 +11,13 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_ energy_scale=1.0, volume_scale=1.0, strain_effect=0.0): # Change model weights + print("Changing model weights about to be called...") + test() change_gpt_weights(gpt_path=GPT_model_path) change_sovits_weights(sovits_path=SoVITS_model_path) + print("Model weights changed successfully.") + # Synthesize audio synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path, prompt_text=ref_text, diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 3ccf45ab..d62a6764 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -41,6 +41,8 @@ for i in range(3): if os.path.exists(pretrained_sovits_name[i]):_[-1].append(pretrained_sovits_name[i]) pretrained_gpt_name,pretrained_sovits_name = _ +def test(): + print("GPT_SoVITS/inference_cli.py is running") if os.path.exists(f"./weight.json"): pass @@ -327,7 +329,7 @@ def change_gpt_weights(gpt_path): change_gpt_weights(gpt_path) -os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" +os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" import torch,soundfile now_dir = os.getcwd() import soundfile diff --git a/api.py b/api.py index cf930a19..de7f04ed 100644 --- a/api.py +++ b/api.py @@ -1,146 +1,3 @@ -""" -# api.py usage - -` python api.py -dr "123.wav" -dt "一二三。" -dl "zh" ` - -## 执行参数: - -`-s` - `SoVITS模型路径, 可在 config.py 中指定` -`-g` - `GPT模型路径, 可在 config.py 中指定` - -调用请求缺少参考音频时使用 -`-dr` - `默认参考音频路径` -`-dt` - `默认参考音频文本` -`-dl` - `默认参考音频语种, "中文","英文","日文","韩文","粤语,"zh","en","ja","ko","yue"` - -`-d` - `推理设备, "cuda","cpu"` -`-a` - `绑定地址, 默认"127.0.0.1"` -`-p` - `绑定端口, 默认9880, 可在 config.py 中指定` -`-fp` - `覆盖 config.py 使用全精度` -`-hp` - `覆盖 config.py 使用半精度` -`-sm` - `流式返回模式, 默认不启用, "close","c", "normal","n", "keepalive","k"` -·-mt` - `返回的音频编码格式, 流式默认ogg, 非流式默认wav, "wav", "ogg", "aac"` -·-st` - `返回的音频数据类型, 默认int16, "int16", "int32"` -·-cp` - `文本切分符号设定, 默认为空, 以",.,。"字符串的方式传入` - -`-hb` - `cnhubert路径` -`-b` - `bert路径` - -## 调用: - -### 推理 - -endpoint: `/` - -使用执行参数指定的参考音频: -GET: - `http://127.0.0.1:9880?text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh` -POST: -```json -{ - "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。", - "text_language": "zh" -} -``` - -使用执行参数指定的参考音频并设定分割符号: -GET: - `http://127.0.0.1:9880?text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh&cut_punc=,。` -POST: -```json -{ - "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。", - "text_language": "zh", - "cut_punc": ",。", -} -``` - -手动指定当次推理所使用的参考音频: -GET: - `http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh&text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh` -POST: -```json -{ - "refer_wav_path": "123.wav", - "prompt_text": "一二三。", - "prompt_language": "zh", - "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。", - "text_language": "zh" -} -``` - -RESP: -成功: 直接返回 wav 音频流, http code 200 -失败: 返回包含错误信息的 json, http code 400 - -手动指定当次推理所使用的参考音频,并提供参数: -GET: - `http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh&text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_language=zh&top_k=20&top_p=0.6&temperature=0.6&speed=1&inp_refs="456.wav"&inp_refs="789.wav"` -POST: -```json -{ - "refer_wav_path": "123.wav", - "prompt_text": "一二三。", - "prompt_language": "zh", - "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。", - "text_language": "zh", - "top_k": 20, - "top_p": 0.6, - "temperature": 0.6, - "speed": 1, - "inp_refs": ["456.wav","789.wav"] -} -``` - -RESP: -成功: 直接返回 wav 音频流, http code 200 -失败: 返回包含错误信息的 json, http code 400 - - -### 更换默认参考音频 - -endpoint: `/change_refer` - -key与推理端一样 - -GET: - `http://127.0.0.1:9880/change_refer?refer_wav_path=123.wav&prompt_text=一二三。&prompt_language=zh` -POST: -```json -{ - "refer_wav_path": "123.wav", - "prompt_text": "一二三。", - "prompt_language": "zh" -} -``` - -RESP: -成功: json, http code 200 -失败: json, 400 - - -### 命令控制 - -endpoint: `/control` - -command: -"restart": 重新运行 -"exit": 结束运行 - -GET: - `http://127.0.0.1:9880/control?command=restart` -POST: -```json -{ - "command": "restart" -} -``` - -RESP: 无 - -""" - - import argparse import os,re import sys @@ -279,7 +136,7 @@ def get_sovits_weights(sovits_path): is_exist_s2gv3=os.path.exists(path_sovits_v3) version, model_version, if_lora_v3=get_sovits_version_from_path_fast(sovits_path) - logger.info(f"the version of version: {version}, model version: {model_version}, if lora v3: {if_lora_v3}") + logger.info(f"the version is: {version}, model version: {model_version}, if lora v3: {if_lora_v3}") if if_lora_v3==True and is_exist_s2gv3==False: logger.info("SoVITS V3 底模缺失,无法加载相应 LoRA 权重") @@ -1021,53 +878,6 @@ change_gpt_sovits_weights(gpt_path = gpt_path, sovits_path = sovits_path) # -------------------------------- app = FastAPI() -@app.post("/set_model") -async def set_model(request: Request): - json_post_raw = await request.json() - return change_gpt_sovits_weights( - gpt_path = json_post_raw.get("gpt_model_path"), - sovits_path = json_post_raw.get("sovits_model_path") - ) - - -@app.get("/set_model") -async def set_model( - gpt_model_path: str = None, - sovits_model_path: str = None, -): - return change_gpt_sovits_weights(gpt_path = gpt_model_path, sovits_path = sovits_model_path) - - -@app.post("/control") -async def control(request: Request): - json_post_raw = await request.json() - return handle_control(json_post_raw.get("command")) - - -@app.get("/control") -async def control(command: str = None): - return handle_control(command) - - -@app.post("/change_refer") -async def change_refer(request: Request): - json_post_raw = await request.json() - return handle_change( - json_post_raw.get("refer_wav_path"), - json_post_raw.get("prompt_text"), - json_post_raw.get("prompt_language") - ) - - -@app.get("/change_refer") -async def change_refer( - refer_wav_path: str = None, - prompt_text: str = None, - prompt_language: str = None -): - return handle_change(refer_wav_path, prompt_text, prompt_language) - - @app.post("/") async def tts_endpoint(request: Request): json_post_raw = await request.json() @@ -1092,60 +902,6 @@ import soundfile as sf import io from fastapi.responses import StreamingResponse -@app.get("/version-4") -async def version_4( - GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt", - SoVITS_model_path = "GPT_SoVITS/pretrained_models/kurari_e20_s1800_l32.pth", - ref_text: str = "おはよう〜。今日はどんな1日過ごすー?くらりはね〜いつでもあなたの味方だよ", - ref_language: str = "日文", - target_text: str = None, - text_language: str = "日文", - output_path: str = None, - character_name: str = "Kurari", - model_id: int = 14, -): - # Create a temporary buffer to store the audio - audio_buffer = io.BytesIO() - # GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path - # Synthesize audio and get the result - - path = "idols/kurari/kurari.wav" - if character_name == "saotome": - path = "idols/saotome/saotome.wav" - GPT_model_path = "GPT_SoVITS/pretrained_models/saotome-e30.ckpt" - SoVITS_model_path = "GPT_SoVITS/pretrained_models/saotome_e9_s522_l32.pth" - ref_text = "今日は友達と一緒に映画を見に行く予定ですが、天気が悪くて少し心配です。" - ref_language = "日文" - - synthesis_result = synthesize( - GPT_model_path = GPT_model_path, - SoVITS_model_path = SoVITS_model_path, - ref_audio_path = path, - ref_text = ref_text, - ref_language = ref_language, - target_text = target_text, - text_language = text_language, - output_path = output_path # Don't save to file - ) - - # Get the last audio data and sample rate from synthesis result - result_list = list(synthesis_result) - if result_list: - last_sampling_rate, last_audio_data = result_list[-1] - - # Write audio data to buffer - # sf.write(audio_buffer, last_audio_data, last_sampling_rate, ) - sf.write(audio_buffer, last_audio_data, last_sampling_rate, format="wav") - audio_buffer.seek(0) - - # Return audio as streaming response - return StreamingResponse( - audio_buffer, - media_type="audio/wav", - headers={"Content-Disposition": "attachment; filename=output.wav"} - ) - - return JSONResponse({"error": "Failed to generate audio"}, status_code=400) def version_4_cli( GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",