add version 4 kurari

2025-12-17 10:27:08 +08:00 · 2025-05-22 15:33:46 +03:00 · 2025-05-22 15:33:46 +03:00 · d2142eef85
commit d2142eef85
parent bae41d84dd
2 changed files with 52 additions and 10 deletions
--- a/GPT_SoVITS/inference_cli.py
+++ b/GPT_SoVITS/inference_cli.py
@ -7,15 +7,8 @@ from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights

 i18n = I18nAuto()

-def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
-    # Read reference text
-    with open(ref_text_path, 'r', encoding='utf-8') as file:
-        ref_text = file.read()
-
-    # Read target text
-    with open(target_text_path, 'r', encoding='utf-8') as file:
-        target_text = file.read()
-
+def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path):
+    
    # Change model weights
    change_gpt_weights(gpt_path=GPT_model_path)
    change_sovits_weights(sovits_path=SoVITS_model_path)
@ -25,10 +18,12 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path,
                                   prompt_text=ref_text, 
                                   prompt_language=i18n(ref_language), 
                                   text=target_text, 
-                                   text_language=i18n(target_language), top_p=1, temperature=1)
+                                   text_language=i18n(text_language), top_p=1, temperature=1)
    
    result_list = list(synthesis_result)

+    return result_list
+
    if result_list:
        last_sampling_rate, last_audio_data = result_list[-1]
        output_wav_path = os.path.join(output_path, "output.wav")
--- a/api.py
+++ b/api.py
@ -1087,6 +1087,53 @@ async def tts_endpoint(request: Request):
        json_post_raw.get("if_sr", False) 
    )

+from GPT_SoVITS.inference_cli import synthesize
+import soundfile as sf
+import io
+from fastapi.responses import StreamingResponse
+
+@app.get("/version-4")
+async def version_4(
+    GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",
+    SoVITS_model_path = "GPT_SoVITS/pretrained_models/kurari_e20_s1800_l32.pth",
+    ref_text: str = "おはよう〜。今日はどんな1日過ごすー？くらりはね〜いつでもあなたの味方だよ",
+    ref_language: str = "ja",
+    target_text: str = None,
+    text_language: str = "ja",
+    output_path: str = None
+):
+    # Create a temporary buffer to store the audio
+    audio_buffer = io.BytesIO()
+    # GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path
+    # Synthesize audio and get the result
+    synthesis_result = synthesize(
+        GPT_model_path = GPT_model_path,
+        SoVITS_model_path = SoVITS_model_path,
+        ref_audio_path = "idols/kurari/kurari.wav",
+        ref_text = ref_text,
+        ref_language = ref_language,
+        target_text = target_text,
+        target_language = text_language,
+        output_path = output_path  # Don't save to file
+    )
+    
+    # Get the last audio data and sample rate from synthesis result
+    result_list = list(synthesis_result)
+    if result_list:
+        last_sampling_rate, last_audio_data = result_list[-1]
+        
+        # Write audio data to buffer
+        sf.write(audio_buffer, last_audio_data, last_sampling_rate)
+        audio_buffer.seek(0)
+        
+        # Return audio as streaming response
+        return StreamingResponse(
+            audio_buffer,
+            media_type="audio/wav",
+            headers={"Content-Disposition": "attachment; filename=output.wav"}
+        )
+    
+    return JSONResponse({"error": "Failed to generate audio"}, status_code=400)

@app.get("/")
 async def tts_endpoint(