This commit is contained in:
samiabat 2025-04-16 09:36:27 +03:00
commit 7bf0088b05
9 changed files with 51 additions and 48 deletions

View File

@ -17,7 +17,7 @@ RUN apt-get update && \
# Copy only requirements.txt initially to leverage Docker cache
WORKDIR /workspace
COPY requirements.txt /workspace/
RUN pip install --no-cache-dir -r requirements.txt
RUN CMAKE_POLICY_VERSION_MINIMUM=3.5 pip install --no-cache-dir -r requirements.txt
# Define a build-time argument for image type
ARG IMAGE_TYPE=full
@ -39,4 +39,4 @@ COPY . /workspace
EXPOSE 9871 9872 9873 9874 9880
CMD ["python", "api.py"]
CMD ["python", "api.py"]

13
api.py
View File

@ -173,6 +173,9 @@ import config as global_config
import logging
import subprocess
import nltk
nltk.download('averaged_perceptron_tagger_eng')
class DefaultRefer:
def __init__(self, path, text, language):
@ -1086,9 +1089,10 @@ async def tts_endpoint(request: Request):
@app.get("/")
async def tts_endpoint(
refer_wav_path: str = None,
prompt_text: str = None,
prompt_language: str = None,
refer_wav_path: str = "saotome/saotome-6s.wav",
prompt_text: str = "今日は友達と一緒に映画を見に行く予定ですが、天気が悪くて少し心配です。",
prompt_language: str = "all_ja",
character: str = "saotome",
text: str = None,
text_language: str = None,
cut_punc: str = None,
@ -1096,10 +1100,11 @@ async def tts_endpoint(
top_p: float = 1.0,
temperature: float = 1.0,
speed: float = 1.0,
inp_refs: list = Query(default=[]),
inp_refs: list = Query(default=["saotome/saotome-10s.wav", "saotome/refs/ref1.wav", "saotome/refs/ref2.wav", "saotome/refs/ref3.wav", "saotome/refs/ref4.wav"]),
sample_steps: int = 32,
if_sr: bool = False
):
print(f"the base path is {refer_wav_path}")
return handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc, top_k, top_p, temperature, speed, inp_refs, sample_steps, if_sr)

View File

@ -1,42 +1,40 @@
numpy==1.23.4
scipy
tensorboard
librosa==0.9.2
numba==0.56.4
pytorch-lightning
gradio>=4.0,<=4.24.0
ffmpeg-python
onnxruntime; sys_platform == 'darwin'
onnxruntime-gpu; sys_platform != 'darwin'
tqdm
funasr==1.0.27
torch<2.4
cn2an
pypinyin
pyopenjtalk>=0.3.4
g2p_en
torchaudio
modelscope==1.10.0
sentencepiece
transformers>=4.43
peft
chardet
PyYAML
psutil
jieba_fast
jieba
split-lang
fast_langdetect>=0.3.0
Faster_Whisper
wordsegment
rotary_embedding_torch
ToJyutping
g2pk2
ko_pron
opencc; sys_platform != 'linux'
opencc==1.1.1; sys_platform == 'linux'
python_mecab_ko; sys_platform != 'win32'
fastapi<0.112.2
x_transformers
torchmetrics<=1.5
attrdict
numpy==1.23.4
scipy==1.13.1
tensorboard==2.19.0
librosa==0.9.2
numba==0.56.4
pytorch-lightning==2.5.1
gradio==4.24.0
ffmpeg-python==0.2.0
onnxruntime==1.19.2 #(for macOS)
onnxruntime-gpu==1.19.2 #(for non-macOS)
tqdm==4.67.1
funasr==1.0.27
cn2an==0.5.23
pypinyin==0.53.0
pyopenjtalk==0.4.0
g2p-en==2.1.0
torchaudio==2.6.0
modelscope==1.10.0
sentencepiece==0.2.0
transformers==4.50.0
peft==0.15.0
chardet==5.2.0
PyYAML==6.0.2
psutil==7.0.0
jieba_fast==0.53
jieba==0.42.1
split-lang==2.1.0
fast-langdetect==0.3.1
faster-whisper==1.1.1
wordsegment==1.3.1
rotary-embedding-torch==0.8.6
ToJyutping==3.2.0
g2pk2==0.0.3
ko-pron==1.3
opencc==1.1.1 #(for Linux)
python-mecab-ko==1.3.7 #(not for Windows)
fastapi==0.112.1 # (since <0.112.2)
x-transformers==2.1.37
torchmetrics==1.5.0
attrdict==2.0.1

BIN
saotome/refs/ref1.wav Normal file

Binary file not shown.

BIN
saotome/refs/ref2.wav Normal file

Binary file not shown.

BIN
saotome/refs/ref3.wav Normal file

Binary file not shown.

BIN
saotome/refs/ref4.wav Normal file

Binary file not shown.

BIN
saotome/saotome-10s.wav Normal file

Binary file not shown.

BIN
saotome/saotome-6s.wav Normal file

Binary file not shown.