mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2026-06-04 05:01:27 +08:00
流式API
This commit is contained in:
parent
bed73febf3
commit
0d405f2d2a
@ -1,6 +1,3 @@
|
|||||||
GPT_SoVITS/pretrained_models/*
|
|
||||||
tools/asr/models/*
|
|
||||||
tools/uvr5/uvr5_weights/*
|
|
||||||
|
|
||||||
.git
|
.git
|
||||||
.DS_Store
|
.DS_Store
|
||||||
@ -11,10 +8,7 @@ runtime
|
|||||||
.idea
|
.idea
|
||||||
output
|
output
|
||||||
logs
|
logs
|
||||||
SoVITS_weights*/
|
|
||||||
GPT_weights*/
|
|
||||||
TEMP
|
TEMP
|
||||||
weight.json
|
|
||||||
ffmpeg*
|
ffmpeg*
|
||||||
ffprobe*
|
ffprobe*
|
||||||
cfg.json
|
cfg.json
|
||||||
|
|||||||
@ -18,7 +18,7 @@ ln -s /workspace/models/pretrained_models /workspace/GPT-SoVITS/GPT_SoVITS/pretr
|
|||||||
|
|
||||||
ln -s /workspace/models/G2PWModel /workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel
|
ln -s /workspace/models/G2PWModel /workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel
|
||||||
|
|
||||||
bash install.sh --device "CU${CUDA_VERSION//./}" --source HF
|
bash install.sh --device "MPS" --source HF
|
||||||
|
|
||||||
pip cache purge
|
pip cache purge
|
||||||
|
|
||||||
|
|||||||
70
Dockerfile
70
Dockerfile
@ -1,62 +1,20 @@
|
|||||||
ARG CUDA_VERSION=12.6
|
FROM python:3.10.18-bullseye
|
||||||
ARG TORCH_BASE=full
|
|
||||||
|
|
||||||
FROM xxxxrt666/torch-base:cu${CUDA_VERSION}-${TORCH_BASE}
|
LABEL version="V2pro"
|
||||||
|
|
||||||
LABEL maintainer="XXXXRT"
|
|
||||||
LABEL version="V4"
|
|
||||||
LABEL description="Docker image for GPT-SoVITS"
|
LABEL description="Docker image for GPT-SoVITS"
|
||||||
|
|
||||||
ARG CUDA_VERSION=12.6
|
WORKDIR /GPT-SoVITS
|
||||||
|
COPY requirements.txt /GPT-SoVITS
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
ENV CUDA_VERSION=${CUDA_VERSION}
|
COPY GPT_SoVITS /GPT-SoVITS/GPT_SoVITS
|
||||||
|
COPY tools /GPT-SoVITS/tools
|
||||||
|
COPY api.py /GPT-SoVITS
|
||||||
|
COPY api_v2.py /GPT-SoVITS
|
||||||
|
COPY config.py /GPT-SoVITS
|
||||||
|
COPY webui.py /GPT-SoVITS
|
||||||
|
COPY ref_audio /GPT-SoVITS/ref_audio
|
||||||
|
|
||||||
SHELL ["/bin/bash", "-c"]
|
EXPOSE 9871 9872 9873 9874 9880 8001 8002
|
||||||
|
|
||||||
WORKDIR /workspace/GPT-SoVITS
|
CMD ["/bin/bash", "-c", "python GPT_SoVITS/inference_webui_api.py"]
|
||||||
|
|
||||||
COPY Docker /workspace/GPT-SoVITS/Docker/
|
|
||||||
|
|
||||||
ARG LITE=false
|
|
||||||
ENV LITE=${LITE}
|
|
||||||
|
|
||||||
ARG WORKFLOW=false
|
|
||||||
ENV WORKFLOW=${WORKFLOW}
|
|
||||||
|
|
||||||
ARG TARGETPLATFORM
|
|
||||||
ENV TARGETPLATFORM=${TARGETPLATFORM}
|
|
||||||
|
|
||||||
RUN bash Docker/miniconda_install.sh
|
|
||||||
|
|
||||||
COPY extra-req.txt /workspace/GPT-SoVITS/
|
|
||||||
|
|
||||||
COPY requirements.txt /workspace/GPT-SoVITS/
|
|
||||||
|
|
||||||
COPY install.sh /workspace/GPT-SoVITS/
|
|
||||||
|
|
||||||
RUN bash Docker/install_wrapper.sh
|
|
||||||
|
|
||||||
EXPOSE 9871 9872 9873 9874 9880
|
|
||||||
|
|
||||||
ENV PYTHONPATH="/workspace/GPT-SoVITS"
|
|
||||||
|
|
||||||
RUN conda init bash && echo "conda activate base" >> ~/.bashrc
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
|
|
||||||
RUN rm -rf /workspace/GPT-SoVITS
|
|
||||||
|
|
||||||
WORKDIR /workspace/GPT-SoVITS
|
|
||||||
|
|
||||||
COPY . /workspace/GPT-SoVITS
|
|
||||||
|
|
||||||
CMD ["/bin/bash", "-c", "\
|
|
||||||
rm -rf /workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models && \
|
|
||||||
rm -rf /workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel && \
|
|
||||||
rm -rf /workspace/GPT-SoVITS/tools/asr/models && \
|
|
||||||
rm -rf /workspace/GPT-SoVITS/tools/uvr5/uvr5_weights && \
|
|
||||||
ln -s /workspace/models/pretrained_models /workspace/GPT-SoVITS/GPT_SoVITS/pretrained_models && \
|
|
||||||
ln -s /workspace/models/G2PWModel /workspace/GPT-SoVITS/GPT_SoVITS/text/G2PWModel && \
|
|
||||||
ln -s /workspace/models/asr_models /workspace/GPT-SoVITS/tools/asr/models && \
|
|
||||||
ln -s /workspace/models/uvr5_weights /workspace/GPT-SoVITS/tools/uvr5/uvr5_weights && \
|
|
||||||
exec bash"]
|
|
||||||
@ -3,9 +3,9 @@ custom:
|
|||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||||
device: cpu
|
device: cpu
|
||||||
is_half: false
|
is_half: false
|
||||||
t2s_weights_path: GPT_weights_v2ProPlus/111-e15.ckpt
|
t2s_weights_path: GPT_SoVITS/pretrained_models/meiv2pp-e15.ckpt
|
||||||
version: v2Pro
|
version: v2
|
||||||
vits_weights_path: SoVITS_weights_v2ProPlus/111_e8_s136.pth
|
vits_weights_path: GPT_SoVITS/pretrained_models/meiv2pp_e8_s232.pth
|
||||||
v1:
|
v1:
|
||||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||||
|
|||||||
@ -525,29 +525,30 @@ import tempfile
|
|||||||
import shutil
|
import shutil
|
||||||
import os
|
import os
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
class InferenceRequest(BaseModel):
|
class InferenceRequest(BaseModel):
|
||||||
text: str
|
text: str
|
||||||
text_lang: str = "中文"
|
text_lang: str = i18n("中文")
|
||||||
ref_audio: str # 这里是base64编码的音频文件内容
|
ref_audio: str # 这里是base64编码的音频文件内容
|
||||||
prompt_text: str = ""
|
prompt_text: str
|
||||||
prompt_lang: str = "中文"
|
prompt_lang: str = i18n("中文")
|
||||||
top_k: int = 5
|
top_k: int = 6
|
||||||
top_p: float = 1
|
top_p: float = 0.9
|
||||||
temperature: float = 1
|
temperature: float = 0.95
|
||||||
text_split_method: str = "按标点符号切"
|
text_split_method: str = i18n("按标点符号切")
|
||||||
batch_size: int = 20
|
batch_size: int = 20
|
||||||
speed_factor: float = 1.1
|
speed_factor: float = 1.1
|
||||||
ref_text_free: bool = True
|
ref_text_free: bool = False
|
||||||
split_bucket: bool = True
|
split_bucket: bool = True
|
||||||
fragment_interval: float = 0.3
|
fragment_interval: float = 0.3
|
||||||
seed: int = -1
|
seed: int = -1
|
||||||
keep_random: bool = True
|
keep_random: bool = True
|
||||||
parallel_infer: bool = True
|
parallel_infer: bool = True
|
||||||
repetition_penalty: float = 1.35
|
repetition_penalty: float = 1.45
|
||||||
sample_steps: int = 32
|
sample_steps: int = 32
|
||||||
super_sampling: bool = False
|
super_sampling: bool = False
|
||||||
|
|
||||||
@ -632,24 +633,21 @@ def wav_chunk_streamer(infer_gen):
|
|||||||
wav_file.writeframes(audio.tobytes())
|
wav_file.writeframes(audio.tobytes())
|
||||||
return buffer.getvalue()
|
return buffer.getvalue()
|
||||||
|
|
||||||
for wav_data, _ in infer_gen:
|
for audio, _ in infer_gen:
|
||||||
sr, audio = wav_data
|
audio_data = audio[0] if isinstance(audio[0], np.ndarray) else audio[1]
|
||||||
if not isinstance(audio, np.ndarray):
|
yield encode_wav_chunk(32000, audio_data) # 每段 WAV 数据
|
||||||
audio = np.array(audio)
|
|
||||||
if audio.dtype != np.int16:
|
|
||||||
audio = (audio * 32768).astype(np.int16)
|
|
||||||
yield encode_wav_chunk(sr, audio) # 每段 WAV 数据
|
|
||||||
|
|
||||||
@app.post("/tts_stream")
|
@app.post("/tts_stream")
|
||||||
async def api_inference(req: InferenceRequest):
|
async def api_inference(req: InferenceRequest):
|
||||||
try:
|
try:
|
||||||
infer_gen = inference(
|
infer_gen = inference(
|
||||||
text=req.text,
|
text=req.text,
|
||||||
text_lang=req.text_lang,
|
text_lang=i18n(req.text_lang),
|
||||||
ref_audio_path=req.ref_audio,
|
ref_audio_path=req.ref_audio,
|
||||||
aux_ref_audio_paths=[],
|
aux_ref_audio_paths=[],
|
||||||
prompt_text=req.prompt_text,
|
prompt_text=req.prompt_text,
|
||||||
prompt_lang=req.prompt_lang,
|
prompt_lang=i18n(req.prompt_lang),
|
||||||
top_k=req.top_k,
|
top_k=req.top_k,
|
||||||
top_p=req.top_p,
|
top_p=req.top_p,
|
||||||
temperature=req.temperature,
|
temperature=req.temperature,
|
||||||
@ -683,4 +681,5 @@ async def api_inference(req: InferenceRequest):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8001)
|
port = int(os.environ.get("PORT", 8001)) # 默认端口8001
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||||
@ -31,6 +31,11 @@ import torch
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
# 在文件开头添加输出目录配置
|
||||||
|
output_dir = os.environ.get("output_dir", "outputs")
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
@ -206,8 +211,19 @@ def inference(
|
|||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
for item in tts_pipeline.run(inputs):
|
for audio in tts_pipeline.run(inputs):
|
||||||
yield item, actual_seed
|
if isinstance(audio, tuple):
|
||||||
|
# 保存到本地
|
||||||
|
output_filename = f"tts_{int(time.time())}.wav"
|
||||||
|
output_path = os.path.join(output_dir, output_filename)
|
||||||
|
audio_data = audio[0] if isinstance(audio[0], numpy.ndarray) else audio[1]
|
||||||
|
import soundfile as sf
|
||||||
|
sf.write(output_path, audio_data, 32000)
|
||||||
|
logging.info(f"音频已保存至: {output_path}")
|
||||||
|
# 返回原始音频数据给 Gradio
|
||||||
|
yield audio, actual_seed
|
||||||
|
else:
|
||||||
|
yield audio, actual_seed
|
||||||
|
|
||||||
logging.info(
|
logging.info(
|
||||||
f"TTS请求耗时: {time.time() - start_time:.3f}s | 文本: {text}"
|
f"TTS请求耗时: {time.time() - start_time:.3f}s | 文本: {text}"
|
||||||
|
|||||||
BIN
GPT_SoVITS/text/ja_userdic/user.dict
Normal file
BIN
GPT_SoVITS/text/ja_userdic/user.dict
Normal file
Binary file not shown.
1
GPT_SoVITS/text/ja_userdic/userdict.md5
Normal file
1
GPT_SoVITS/text/ja_userdic/userdict.md5
Normal file
@ -0,0 +1 @@
|
|||||||
|
d36bd5ffba62f195d22bf4f1a41cd08f
|
||||||
BIN
ref_audio/1.wav
Normal file
BIN
ref_audio/1.wav
Normal file
Binary file not shown.
BIN
ref_audio/2.wav
Normal file
BIN
ref_audio/2.wav
Normal file
Binary file not shown.
BIN
ref_audio/3.wav
Normal file
BIN
ref_audio/3.wav
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user