add version 4 kurari

2025-09-08 18:28:27 +08:00 · 2025-05-22 15:33:46 +03:00 · 2025-05-22 15:33:46 +03:00 · d2142eef85
commit d2142eef85
parent bae41d84dd
2 changed files with 52 additions and 10 deletions
--- a/GPT_SoVITS/inference_cli.py
+++ b/GPT_SoVITS/inference_cli.py
@ -7,14 +7,7 @@ from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights
 i18n = I18nAuto()
-def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
+def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path):
    # Read reference text
    with open(ref_text_path, 'r', encoding='utf-8') as file:
        ref_text = file.read()
    # Read target text
    with open(target_text_path, 'r', encoding='utf-8') as file:
        target_text = file.read()
    # Change model weights
    change_gpt_weights(gpt_path=GPT_model_path)
@ -25,10 +18,12 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path,
                                   prompt_text=ref_text, 
                                   prompt_language=i18n(ref_language), 
                                   text=target_text, 
-                                   text_language=i18n(target_language), top_p=1, temperature=1)
+                                   text_language=i18n(text_language), top_p=1, temperature=1)
    result_list = list(synthesis_result)
    return result_list
    if result_list:
        last_sampling_rate, last_audio_data = result_list[-1]
        output_wav_path = os.path.join(output_path, "output.wav")
--- a/api.py
+++ b/api.py
@ -1087,6 +1087,53 @@ async def tts_endpoint(request: Request):
        json_post_raw.get("if_sr", False) 
    )
 from GPT_SoVITS.inference_cli import synthesize
 import soundfile as sf
 import io
 from fastapi.responses import StreamingResponse
@app.get("/version-4")
 async def version_4(
    GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",
    SoVITS_model_path = "GPT_SoVITS/pretrained_models/kurari_e20_s1800_l32.pth",
    ref_text: str = "おはよう〜。今日はどんな1日過ごすー？くらりはね〜いつでもあなたの味方だよ",
    ref_language: str = "ja",
    target_text: str = None,
    text_language: str = "ja",
    output_path: str = None
 ):
    # Create a temporary buffer to store the audio
    audio_buffer = io.BytesIO()
    # GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path
    # Synthesize audio and get the result
    synthesis_result = synthesize(
        GPT_model_path = GPT_model_path,
        SoVITS_model_path = SoVITS_model_path,
        ref_audio_path = "idols/kurari/kurari.wav",
        ref_text = ref_text,
        ref_language = ref_language,
        target_text = target_text,
        target_language = text_language,
        output_path = output_path  # Don't save to file
    )
    # Get the last audio data and sample rate from synthesis result
    result_list = list(synthesis_result)
    if result_list:
        last_sampling_rate, last_audio_data = result_list[-1]
        # Write audio data to buffer
        sf.write(audio_buffer, last_audio_data, last_sampling_rate)
        audio_buffer.seek(0)
        # Return audio as streaming response
        return StreamingResponse(
            audio_buffer,
            media_type="audio/wav",
            headers={"Content-Disposition": "attachment; filename=output.wav"}
        )
    return JSONResponse({"error": "Failed to generate audio"}, status_code=400)
@app.get("/")
 async def tts_endpoint(