This commit is contained in:
samiabat 2025-07-09 02:18:57 +03:00
parent 49058c1f2a
commit 39e77a61ac
3 changed files with 9 additions and 247 deletions

View File

@ -3,7 +3,7 @@ import os
import soundfile as sf
from tools.i18n.i18n import I18nAuto
from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav
from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav, test
i18n = I18nAuto()
@ -11,9 +11,13 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_
energy_scale=1.0, volume_scale=1.0, strain_effect=0.0):
# Change model weights
print("Changing model weights about to be called...")
test()
change_gpt_weights(gpt_path=GPT_model_path)
change_sovits_weights(sovits_path=SoVITS_model_path)
print("Model weights changed successfully.")
# Synthesize audio
synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path,
prompt_text=ref_text,

View File

@ -41,6 +41,8 @@ for i in range(3):
if os.path.exists(pretrained_sovits_name[i]):_[-1].append(pretrained_sovits_name[i])
pretrained_gpt_name,pretrained_sovits_name = _
def test():
print("GPT_SoVITS/inference_cli.py is running")
if os.path.exists(f"./weight.json"):
pass
@ -327,7 +329,7 @@ def change_gpt_weights(gpt_path):
change_gpt_weights(gpt_path)
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
import torch,soundfile
now_dir = os.getcwd()
import soundfile

246
api.py
View File

@ -1,146 +1,3 @@
"""
# api.py usage
` python api.py -dr "123.wav" -dt "一二三。" -dl "zh" `
## 执行参数:
`-s` - `SoVITS模型路径, 可在 config.py 中指定`
`-g` - `GPT模型路径, 可在 config.py 中指定`
调用请求缺少参考音频时使用
`-dr` - `默认参考音频路径`
`-dt` - `默认参考音频文本`
`-dl` - `默认参考音频语种, "中文","英文","日文","韩文","粤语,"zh","en","ja","ko","yue"`
`-d` - `推理设备, "cuda","cpu"`
`-a` - `绑定地址, 默认"127.0.0.1"`
`-p` - `绑定端口, 默认9880, 可在 config.py 中指定`
`-fp` - `覆盖 config.py 使用全精度`
`-hp` - `覆盖 config.py 使用半精度`
`-sm` - `流式返回模式, 默认不启用, "close","c", "normal","n", "keepalive","k"`
·-mt` - `返回的音频编码格式, 流式默认ogg, 非流式默认wav, "wav", "ogg", "aac"`
·-st` - `返回的音频数据类型, 默认int16, "int16", "int32"`
·-cp` - `文本切分符号设定, 默认为空, ",.,。"字符串的方式传入`
`-hb` - `cnhubert路径`
`-b` - `bert路径`
## 调用:
### 推理
endpoint: `/`
使用执行参数指定的参考音频:
GET:
`http://127.0.0.1:9880?text=先帝创业未半而中道崩殂今天下三分益州疲弊此诚危急存亡之秋也&text_language=zh`
POST:
```json
{
"text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
"text_language": "zh"
}
```
使用执行参数指定的参考音频并设定分割符号:
GET:
`http://127.0.0.1:9880?text=先帝创业未半而中道崩殂今天下三分益州疲弊此诚危急存亡之秋也&text_language=zh&cut_punc=`
POST:
```json
{
"text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
"text_language": "zh",
"cut_punc": ",。",
}
```
手动指定当次推理所使用的参考音频:
GET:
`http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三&prompt_language=zh&text=先帝创业未半而中道崩殂今天下三分益州疲弊此诚危急存亡之秋也&text_language=zh`
POST:
```json
{
"refer_wav_path": "123.wav",
"prompt_text": "一二三。",
"prompt_language": "zh",
"text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
"text_language": "zh"
}
```
RESP:
成功: 直接返回 wav 音频流 http code 200
失败: 返回包含错误信息的 json, http code 400
手动指定当次推理所使用的参考音频并提供参数:
GET:
`http://127.0.0.1:9880?refer_wav_path=123.wav&prompt_text=一二三&prompt_language=zh&text=先帝创业未半而中道崩殂今天下三分益州疲弊此诚危急存亡之秋也&text_language=zh&top_k=20&top_p=0.6&temperature=0.6&speed=1&inp_refs="456.wav"&inp_refs="789.wav"`
POST:
```json
{
"refer_wav_path": "123.wav",
"prompt_text": "一二三。",
"prompt_language": "zh",
"text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。",
"text_language": "zh",
"top_k": 20,
"top_p": 0.6,
"temperature": 0.6,
"speed": 1,
"inp_refs": ["456.wav","789.wav"]
}
```
RESP:
成功: 直接返回 wav 音频流 http code 200
失败: 返回包含错误信息的 json, http code 400
### 更换默认参考音频
endpoint: `/change_refer`
key与推理端一样
GET:
`http://127.0.0.1:9880/change_refer?refer_wav_path=123.wav&prompt_text=一二三&prompt_language=zh`
POST:
```json
{
"refer_wav_path": "123.wav",
"prompt_text": "一二三。",
"prompt_language": "zh"
}
```
RESP:
成功: json, http code 200
失败: json, 400
### 命令控制
endpoint: `/control`
command:
"restart": 重新运行
"exit": 结束运行
GET:
`http://127.0.0.1:9880/control?command=restart`
POST:
```json
{
"command": "restart"
}
```
RESP:
"""
import argparse
import os,re
import sys
@ -279,7 +136,7 @@ def get_sovits_weights(sovits_path):
is_exist_s2gv3=os.path.exists(path_sovits_v3)
version, model_version, if_lora_v3=get_sovits_version_from_path_fast(sovits_path)
logger.info(f"the version of version: {version}, model version: {model_version}, if lora v3: {if_lora_v3}")
logger.info(f"the version is: {version}, model version: {model_version}, if lora v3: {if_lora_v3}")
if if_lora_v3==True and is_exist_s2gv3==False:
logger.info("SoVITS V3 底模缺失,无法加载相应 LoRA 权重")
@ -1021,53 +878,6 @@ change_gpt_sovits_weights(gpt_path = gpt_path, sovits_path = sovits_path)
# --------------------------------
app = FastAPI()
@app.post("/set_model")
async def set_model(request: Request):
json_post_raw = await request.json()
return change_gpt_sovits_weights(
gpt_path = json_post_raw.get("gpt_model_path"),
sovits_path = json_post_raw.get("sovits_model_path")
)
@app.get("/set_model")
async def set_model(
gpt_model_path: str = None,
sovits_model_path: str = None,
):
return change_gpt_sovits_weights(gpt_path = gpt_model_path, sovits_path = sovits_model_path)
@app.post("/control")
async def control(request: Request):
json_post_raw = await request.json()
return handle_control(json_post_raw.get("command"))
@app.get("/control")
async def control(command: str = None):
return handle_control(command)
@app.post("/change_refer")
async def change_refer(request: Request):
json_post_raw = await request.json()
return handle_change(
json_post_raw.get("refer_wav_path"),
json_post_raw.get("prompt_text"),
json_post_raw.get("prompt_language")
)
@app.get("/change_refer")
async def change_refer(
refer_wav_path: str = None,
prompt_text: str = None,
prompt_language: str = None
):
return handle_change(refer_wav_path, prompt_text, prompt_language)
@app.post("/")
async def tts_endpoint(request: Request):
json_post_raw = await request.json()
@ -1092,60 +902,6 @@ import soundfile as sf
import io
from fastapi.responses import StreamingResponse
@app.get("/version-4")
async def version_4(
GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",
SoVITS_model_path = "GPT_SoVITS/pretrained_models/kurari_e20_s1800_l32.pth",
ref_text: str = "おはよう〜。今日はどんな1日過ごすーくらりはね〜いつでもあなたの味方だよ",
ref_language: str = "日文",
target_text: str = None,
text_language: str = "日文",
output_path: str = None,
character_name: str = "Kurari",
model_id: int = 14,
):
# Create a temporary buffer to store the audio
audio_buffer = io.BytesIO()
# GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path
# Synthesize audio and get the result
path = "idols/kurari/kurari.wav"
if character_name == "saotome":
path = "idols/saotome/saotome.wav"
GPT_model_path = "GPT_SoVITS/pretrained_models/saotome-e30.ckpt"
SoVITS_model_path = "GPT_SoVITS/pretrained_models/saotome_e9_s522_l32.pth"
ref_text = "今日は友達と一緒に映画を見に行く予定ですが、天気が悪くて少し心配です。"
ref_language = "日文"
synthesis_result = synthesize(
GPT_model_path = GPT_model_path,
SoVITS_model_path = SoVITS_model_path,
ref_audio_path = path,
ref_text = ref_text,
ref_language = ref_language,
target_text = target_text,
text_language = text_language,
output_path = output_path # Don't save to file
)
# Get the last audio data and sample rate from synthesis result
result_list = list(synthesis_result)
if result_list:
last_sampling_rate, last_audio_data = result_list[-1]
# Write audio data to buffer
# sf.write(audio_buffer, last_audio_data, last_sampling_rate, )
sf.write(audio_buffer, last_audio_data, last_sampling_rate, format="wav")
audio_buffer.seek(0)
# Return audio as streaming response
return StreamingResponse(
audio_buffer,
media_type="audio/wav",
headers={"Content-Disposition": "attachment; filename=output.wav"}
)
return JSONResponse({"error": "Failed to generate audio"}, status_code=400)
def version_4_cli(
GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",