mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-09-07 00:29:48 +08:00
add version 4 kurari
This commit is contained in:
parent
bae41d84dd
commit
d2142eef85
@ -7,15 +7,8 @@ from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights
|
||||
|
||||
i18n = I18nAuto()
|
||||
|
||||
def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
|
||||
# Read reference text
|
||||
with open(ref_text_path, 'r', encoding='utf-8') as file:
|
||||
ref_text = file.read()
|
||||
|
||||
# Read target text
|
||||
with open(target_text_path, 'r', encoding='utf-8') as file:
|
||||
target_text = file.read()
|
||||
|
||||
def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path):
|
||||
|
||||
# Change model weights
|
||||
change_gpt_weights(gpt_path=GPT_model_path)
|
||||
change_sovits_weights(sovits_path=SoVITS_model_path)
|
||||
@ -25,10 +18,12 @@ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path,
|
||||
prompt_text=ref_text,
|
||||
prompt_language=i18n(ref_language),
|
||||
text=target_text,
|
||||
text_language=i18n(target_language), top_p=1, temperature=1)
|
||||
text_language=i18n(text_language), top_p=1, temperature=1)
|
||||
|
||||
result_list = list(synthesis_result)
|
||||
|
||||
return result_list
|
||||
|
||||
if result_list:
|
||||
last_sampling_rate, last_audio_data = result_list[-1]
|
||||
output_wav_path = os.path.join(output_path, "output.wav")
|
||||
|
47
api.py
47
api.py
@ -1087,6 +1087,53 @@ async def tts_endpoint(request: Request):
|
||||
json_post_raw.get("if_sr", False)
|
||||
)
|
||||
|
||||
from GPT_SoVITS.inference_cli import synthesize
|
||||
import soundfile as sf
|
||||
import io
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
@app.get("/version-4")
|
||||
async def version_4(
|
||||
GPT_model_path = "GPT_SoVITS/pretrained_models/kurari-e40.ckpt",
|
||||
SoVITS_model_path = "GPT_SoVITS/pretrained_models/kurari_e20_s1800_l32.pth",
|
||||
ref_text: str = "おはよう〜。今日はどんな1日過ごすー?くらりはね〜いつでもあなたの味方だよ",
|
||||
ref_language: str = "ja",
|
||||
target_text: str = None,
|
||||
text_language: str = "ja",
|
||||
output_path: str = None
|
||||
):
|
||||
# Create a temporary buffer to store the audio
|
||||
audio_buffer = io.BytesIO()
|
||||
# GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text, ref_language, target_text, text_language, output_path
|
||||
# Synthesize audio and get the result
|
||||
synthesis_result = synthesize(
|
||||
GPT_model_path = GPT_model_path,
|
||||
SoVITS_model_path = SoVITS_model_path,
|
||||
ref_audio_path = "idols/kurari/kurari.wav",
|
||||
ref_text = ref_text,
|
||||
ref_language = ref_language,
|
||||
target_text = target_text,
|
||||
target_language = text_language,
|
||||
output_path = output_path # Don't save to file
|
||||
)
|
||||
|
||||
# Get the last audio data and sample rate from synthesis result
|
||||
result_list = list(synthesis_result)
|
||||
if result_list:
|
||||
last_sampling_rate, last_audio_data = result_list[-1]
|
||||
|
||||
# Write audio data to buffer
|
||||
sf.write(audio_buffer, last_audio_data, last_sampling_rate)
|
||||
audio_buffer.seek(0)
|
||||
|
||||
# Return audio as streaming response
|
||||
return StreamingResponse(
|
||||
audio_buffer,
|
||||
media_type="audio/wav",
|
||||
headers={"Content-Disposition": "attachment; filename=output.wav"}
|
||||
)
|
||||
|
||||
return JSONResponse({"error": "Failed to generate audio"}, status_code=400)
|
||||
|
||||
@app.get("/")
|
||||
async def tts_endpoint(
|
||||
|
Loading…
x
Reference in New Issue
Block a user