From d2142eef85b553bcfb7c325b4f4ae8c3251ea534 Mon Sep 17 00:00:00 2001
From: samiabat <samuelabatneh20@gmail.com>
Date: Thu, 22 May 2025 15:33:46 +0300
Subject: [PATCH] add version 4 kurari

---
 GPT_SoVITS/inference_cli.py | 15 ++++--------
 api.py                      | 47 +++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/GPT_SoVITS/inference_cli.py b/GPT_SoVITS/inference_cli.py
index bd987aaf..6a57ca2a 100644
--- a/GPT_SoVITS/inference_cli.py
+++ b/GPT_SoVITS/inference_cli.py
@@ -7,15 +7,8 @@ from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights
 
 i18n = I18nAuto()
 
-def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
-    # Read reference text
-    with open(ref_text_path, 'r', encoding='utf-8') as file:
-        ref_text = file.read()
-
-    # Read target text
-    with open(target_text_path, 'r', encoding='utf-8') as file:
-        target_text = file.read()
-
+def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path):
+    
     # Change model weights
     change_gpt_weights(gpt_path=GPT_model_path)
     change_sovits_weights(sovits_path=SoVITS_model_path)
@@ -25,10 +18,12 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path,
                                    prompt_text=ref_text, 
                                    prompt_language=i18n(ref_language), 
                                    text=target_text, 
-                                   text_language=i18n(target_language), top_p=1, temperature=1)
+                                   text_language=i18n(text_language), top_p=1, temperature=1)
     
     result_list = list(synthesis_result)
 
+    return result_list
+
     if result_list:
         last_sampling_rate, last_audio_data = result_list[-1]
         output_wav_path = os.path.join(output_path, "output.wav")
diff --git a/api.py b/api.py
index 7721a663..045c1825 100644
--- a/api.py
+++ b/api.py
@@ -1087,6 +1087,53 @@ async def tts_endpoint(request: Request):
         json_post_raw.get("if_sr", False) 
     )
 
+from GPT_SoVITS.inference_cli import synthesize
+import soundfile as sf
+import io
+from fastapi.responses import StreamingResponse
+
+@app.get("/version-4")
+async def version_4(
+    GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",
+    SoVITS_model_path = "GPT_SoVITS/pretrained_models/kurari_e20_s1800_l32.pth",
+    ref_text: str = "おはよう〜。今日はどんな1日過ごすー？くらりはね〜いつでもあなたの味方だよ",
+    ref_language: str = "ja",
+    target_text: str = None,
+    text_language: str = "ja",
+    output_path: str = None
+):
+    # Create a temporary buffer to store the audio
+    audio_buffer = io.BytesIO()
+    # GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path
+    # Synthesize audio and get the result
+    synthesis_result = synthesize(
+        GPT_model_path = GPT_model_path,
+        SoVITS_model_path = SoVITS_model_path,
+        ref_audio_path = "idols/kurari/kurari.wav",
+        ref_text = ref_text,
+        ref_language = ref_language,
+        target_text = target_text,
+        target_language = text_language,
+        output_path = output_path  # Don't save to file
+    )
+    
+    # Get the last audio data and sample rate from synthesis result
+    result_list = list(synthesis_result)
+    if result_list:
+        last_sampling_rate, last_audio_data = result_list[-1]
+        
+        # Write audio data to buffer
+        sf.write(audio_buffer, last_audio_data, last_sampling_rate)
+        audio_buffer.seek(0)
+        
+        # Return audio as streaming response
+        return StreamingResponse(
+            audio_buffer,
+            media_type="audio/wav",
+            headers={"Content-Disposition": "attachment; filename=output.wav"}
+        )
+    
+    return JSONResponse({"error": "Failed to generate audio"}, status_code=400)
 
 @app.get("/")
 async def tts_endpoint(