add version 4 kurari

This commit is contained in:
samiabat 2025-05-22 15:33:46 +03:00
parent bae41d84dd
commit d2142eef85
2 changed files with 52 additions and 10 deletions

View File

@ -7,15 +7,8 @@ from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights
i18n = I18nAuto()
def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
# Read reference text
with open(ref_text_path, 'r', encoding='utf-8') as file:
ref_text = file.read()
# Read target text
with open(target_text_path, 'r', encoding='utf-8') as file:
target_text = file.read()
def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path):
# Change model weights
change_gpt_weights(gpt_path=GPT_model_path)
change_sovits_weights(sovits_path=SoVITS_model_path)
@ -25,10 +18,12 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path,
prompt_text=ref_text,
prompt_language=i18n(ref_language),
text=target_text,
text_language=i18n(target_language), top_p=1, temperature=1)
text_language=i18n(text_language), top_p=1, temperature=1)
result_list = list(synthesis_result)
return result_list
if result_list:
last_sampling_rate, last_audio_data = result_list[-1]
output_wav_path = os.path.join(output_path, "output.wav")

47
api.py
View File

@ -1087,6 +1087,53 @@ async def tts_endpoint(request: Request):
json_post_raw.get("if_sr", False)
)
from GPT_SoVITS.inference_cli import synthesize
import soundfile as sf
import io
from fastapi.responses import StreamingResponse
@app.get("/version-4")
async def version_4(
GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",
SoVITS_model_path = "GPT_SoVITS/pretrained_models/kurari_e20_s1800_l32.pth",
ref_text: str = "おはよう〜。今日はどんな1日過ごすーくらりはね〜いつでもあなたの味方だよ",
ref_language: str = "ja",
target_text: str = None,
text_language: str = "ja",
output_path: str = None
):
# Create a temporary buffer to store the audio
audio_buffer = io.BytesIO()
# GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path
# Synthesize audio and get the result
synthesis_result = synthesize(
GPT_model_path = GPT_model_path,
SoVITS_model_path = SoVITS_model_path,
ref_audio_path = "idols/kurari/kurari.wav",
ref_text = ref_text,
ref_language = ref_language,
target_text = target_text,
target_language = text_language,
output_path = output_path # Don't save to file
)
# Get the last audio data and sample rate from synthesis result
result_list = list(synthesis_result)
if result_list:
last_sampling_rate, last_audio_data = result_list[-1]
# Write audio data to buffer
sf.write(audio_buffer, last_audio_data, last_sampling_rate)
audio_buffer.seek(0)
# Return audio as streaming response
return StreamingResponse(
audio_buffer,
media_type="audio/wav",
headers={"Content-Disposition": "attachment; filename=output.wav"}
)
return JSONResponse({"error": "Failed to generate audio"}, status_code=400)
@app.get("/")
async def tts_endpoint(