mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2026-06-04 05:01:27 +08:00
流式API
This commit is contained in:
parent
bed73febf3
commit
0d405f2d2a
@ -1,6 +1,3 @@
|
||||
GPT_SoVITS/pretrained_models/*
|
||||
tools/asr/models/*
|
||||
tools/uvr5/uvr5_weights/*
|
||||
|
||||
.git
|
||||
.DS_Store
|
||||
@ -11,10 +8,7 @@ runtime
|
||||
.idea
|
||||
output
|
||||
logs
|
||||
SoVITS_weights*/
|
||||
GPT_weights*/
|
||||
TEMP
|
||||
weight.json
|
||||
ffmpeg*
|
||||
ffprobe*
|
||||
cfg.json
|
||||
|
||||
@ -18,7 +18,7 @@ ln -s /workspace/models/pretrained_models /workspace/GPT-SoVITS/GPT_SoVITS/pretr
|
||||
|
||||
ln -s /workspace/models/G2PWModel /workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel
|
||||
|
||||
bash install.sh --device "CU${CUDA_VERSION//./}" --source HF
|
||||
bash install.sh --device "MPS" --source HF
|
||||
|
||||
pip cache purge
|
||||
|
||||
|
||||
70
Dockerfile
70
Dockerfile
@ -1,62 +1,20 @@
|
||||
ARG CUDA_VERSION=12.6
|
||||
ARG TORCH_BASE=full
|
||||
FROM python:3.10.18-bullseye
|
||||
|
||||
FROM xxxxrt666/torch-base:cu${CUDA_VERSION}-${TORCH_BASE}
|
||||
|
||||
LABEL maintainer="XXXXRT"
|
||||
LABEL version="V4"
|
||||
LABEL version="V2pro"
|
||||
LABEL description="Docker image for GPT-SoVITS"
|
||||
|
||||
ARG CUDA_VERSION=12.6
|
||||
WORKDIR /GPT-SoVITS
|
||||
COPY requirements.txt /GPT-SoVITS
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
ENV CUDA_VERSION=${CUDA_VERSION}
|
||||
COPY GPT_SoVITS /GPT-SoVITS/GPT_SoVITS
|
||||
COPY tools /GPT-SoVITS/tools
|
||||
COPY api.py /GPT-SoVITS
|
||||
COPY api_v2.py /GPT-SoVITS
|
||||
COPY config.py /GPT-SoVITS
|
||||
COPY webui.py /GPT-SoVITS
|
||||
COPY ref_audio /GPT-SoVITS/ref_audio
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
EXPOSE 9871 9872 9873 9874 9880 8001 8002
|
||||
|
||||
WORKDIR /workspace/GPT-SoVITS
|
||||
|
||||
COPY Docker /workspace/GPT-SoVITS/Docker/
|
||||
|
||||
ARG LITE=false
|
||||
ENV LITE=${LITE}
|
||||
|
||||
ARG WORKFLOW=false
|
||||
ENV WORKFLOW=${WORKFLOW}
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
ENV TARGETPLATFORM=${TARGETPLATFORM}
|
||||
|
||||
RUN bash Docker/miniconda_install.sh
|
||||
|
||||
COPY extra-req.txt /workspace/GPT-SoVITS/
|
||||
|
||||
COPY requirements.txt /workspace/GPT-SoVITS/
|
||||
|
||||
COPY install.sh /workspace/GPT-SoVITS/
|
||||
|
||||
RUN bash Docker/install_wrapper.sh
|
||||
|
||||
EXPOSE 9871 9872 9873 9874 9880
|
||||
|
||||
ENV PYTHONPATH="/workspace/GPT-SoVITS"
|
||||
|
||||
RUN conda init bash && echo "conda activate base" >> ~/.bashrc
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
RUN rm -rf /workspace/GPT-SoVITS
|
||||
|
||||
WORKDIR /workspace/GPT-SoVITS
|
||||
|
||||
COPY . /workspace/GPT-SoVITS
|
||||
|
||||
CMD ["/bin/bash", "-c", "\
|
||||
rm -rf /workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models && \
|
||||
rm -rf /workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel && \
|
||||
rm -rf /workspace/GPT-SoVITS/tools/asr/models && \
|
||||
rm -rf /workspace/GPT-SoVITS/tools/uvr5/uvr5_weights && \
|
||||
ln -s /workspace/models/pretrained_models /workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models && \
|
||||
ln -s /workspace/models/G2PWModel /workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel && \
|
||||
ln -s /workspace/models/asr_models /workspace/GPT-SoVITS/tools/asr/models && \
|
||||
ln -s /workspace/models/uvr5_weights /workspace/GPT-SoVITS/tools/uvr5/uvr5_weights && \
|
||||
exec bash"]
|
||||
CMD ["/bin/bash", "-c", "python GPT_SoVITS/inference_webui_api.py"]
|
||||
@ -3,9 +3,9 @@ custom:
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
device: cpu
|
||||
is_half: false
|
||||
t2s_weights_path: GPT_weights_v2ProPlus/111-e15.ckpt
|
||||
version: v2Pro
|
||||
vits_weights_path: SoVITS_weights_v2ProPlus/111_e8_s136.pth
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/meiv2pp-e15.ckpt
|
||||
version: v2
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/meiv2pp_e8_s232.pth
|
||||
v1:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
|
||||
@ -525,29 +525,30 @@ import tempfile
|
||||
import shutil
|
||||
import os
|
||||
from pydantic import BaseModel
|
||||
import soundfile as sf
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class InferenceRequest(BaseModel):
|
||||
text: str
|
||||
text_lang: str = "中文"
|
||||
text_lang: str = i18n("中文")
|
||||
ref_audio: str # 这里是base64编码的音频文件内容
|
||||
prompt_text: str = ""
|
||||
prompt_lang: str = "中文"
|
||||
top_k: int = 5
|
||||
top_p: float = 1
|
||||
temperature: float = 1
|
||||
text_split_method: str = "按标点符号切"
|
||||
prompt_text: str
|
||||
prompt_lang: str = i18n("中文")
|
||||
top_k: int = 6
|
||||
top_p: float = 0.9
|
||||
temperature: float = 0.95
|
||||
text_split_method: str = i18n("按标点符号切")
|
||||
batch_size: int = 20
|
||||
speed_factor: float = 1.1
|
||||
ref_text_free: bool = True
|
||||
ref_text_free: bool = False
|
||||
split_bucket: bool = True
|
||||
fragment_interval: float = 0.3
|
||||
seed: int = -1
|
||||
keep_random: bool = True
|
||||
parallel_infer: bool = True
|
||||
repetition_penalty: float = 1.35
|
||||
repetition_penalty: float = 1.45
|
||||
sample_steps: int = 32
|
||||
super_sampling: bool = False
|
||||
|
||||
@ -632,24 +633,21 @@ def wav_chunk_streamer(infer_gen):
|
||||
wav_file.writeframes(audio.tobytes())
|
||||
return buffer.getvalue()
|
||||
|
||||
for wav_data, _ in infer_gen:
|
||||
sr, audio = wav_data
|
||||
if not isinstance(audio, np.ndarray):
|
||||
audio = np.array(audio)
|
||||
if audio.dtype != np.int16:
|
||||
audio = (audio * 32768).astype(np.int16)
|
||||
yield encode_wav_chunk(sr, audio) # 每段 WAV 数据
|
||||
for audio, _ in infer_gen:
|
||||
audio_data = audio[0] if isinstance(audio[0], np.ndarray) else audio[1]
|
||||
yield encode_wav_chunk(32000, audio_data) # 每段 WAV 数据
|
||||
|
||||
|
||||
@app.post("/tts_stream")
|
||||
async def api_inference(req: InferenceRequest):
|
||||
try:
|
||||
infer_gen = inference(
|
||||
text=req.text,
|
||||
text_lang=req.text_lang,
|
||||
text_lang=i18n(req.text_lang),
|
||||
ref_audio_path=req.ref_audio,
|
||||
aux_ref_audio_paths=[],
|
||||
prompt_text=req.prompt_text,
|
||||
prompt_lang=req.prompt_lang,
|
||||
prompt_lang=i18n(req.prompt_lang),
|
||||
top_k=req.top_k,
|
||||
top_p=req.top_p,
|
||||
temperature=req.temperature,
|
||||
@ -683,4 +681,5 @@ async def api_inference(req: InferenceRequest):
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8001)
|
||||
port = int(os.environ.get("PORT", 8001)) # 默认端口8001
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
@ -31,6 +31,11 @@ import torch
|
||||
|
||||
import logging
|
||||
import time
|
||||
import numpy
|
||||
|
||||
# 在文件开头添加输出目录配置
|
||||
output_dir = os.environ.get("output_dir", "outputs")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
@ -206,8 +211,19 @@ def inference(
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for item in tts_pipeline.run(inputs):
|
||||
yield item, actual_seed
|
||||
for audio in tts_pipeline.run(inputs):
|
||||
if isinstance(audio, tuple):
|
||||
# 保存到本地
|
||||
output_filename = f"tts_{int(time.time())}.wav"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
audio_data = audio[0] if isinstance(audio[0], numpy.ndarray) else audio[1]
|
||||
import soundfile as sf
|
||||
sf.write(output_path, audio_data, 32000)
|
||||
logging.info(f"音频已保存至: {output_path}")
|
||||
# 返回原始音频数据给 Gradio
|
||||
yield audio, actual_seed
|
||||
else:
|
||||
yield audio, actual_seed
|
||||
|
||||
logging.info(
|
||||
f"TTS请求耗时: {time.time() - start_time:.3f}s | 文本: {text}"
|
||||
|
||||
BIN
GPT_SoVITS/text/ja_userdic/user.dict
Normal file
BIN
GPT_SoVITS/text/ja_userdic/user.dict
Normal file
Binary file not shown.
1
GPT_SoVITS/text/ja_userdic/userdict.md5
Normal file
1
GPT_SoVITS/text/ja_userdic/userdict.md5
Normal file
@ -0,0 +1 @@
|
||||
d36bd5ffba62f195d22bf4f1a41cd08f
|
||||
BIN
ref_audio/1.wav
Normal file
BIN
ref_audio/1.wav
Normal file
Binary file not shown.
BIN
ref_audio/2.wav
Normal file
BIN
ref_audio/2.wav
Normal file
Binary file not shown.
BIN
ref_audio/3.wav
Normal file
BIN
ref_audio/3.wav
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user