mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-08-18 15:59:51 +08:00
turn it into a package
This commit is contained in:
parent
a70e1ad30c
commit
71b2fe69ea
@ -1,60 +1,66 @@
|
||||
from copy import deepcopy
|
||||
import math
|
||||
import os, sys, gc
|
||||
import random
|
||||
import traceback
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
import ffmpeg
|
||||
import os
|
||||
from typing import Generator, List, Tuple, Union
|
||||
import librosa
|
||||
import numpy as np
|
||||
import random
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import traceback
|
||||
import yaml
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
|
||||
from huggingface_hub import snapshot_download, hf_hub_download
|
||||
from importlib.resources import files
|
||||
from time import time as ttime
|
||||
from typing import Generator, List, Tuple, Union
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||
from feature_extractor.cnhubert import CNHubert
|
||||
from module.models import SynthesizerTrn
|
||||
import librosa
|
||||
from time import time as ttime
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from tools.my_utils import load_audio
|
||||
from module.mel_processing import spectrogram_torch
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
from TTS_infer_pack.text_segmentation_method import splits
|
||||
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||
|
||||
language=os.environ.get("language","Auto")
|
||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||
i18n = I18nAuto(language=language)
|
||||
LIBRARY_NAME = "GPT_SoVITS"
|
||||
|
||||
# configs/tts_infer.yaml
|
||||
"""
|
||||
custom:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||
device: cpu
|
||||
is_half: false
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
version: v2
|
||||
default:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||
device: cpu
|
||||
is_half: false
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/s2G488k.pth
|
||||
t2s_weights_path: pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||
vits_weights_path: pretrained_models/s2G488k.pth
|
||||
version: v1
|
||||
default_v2:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||
device: cpu
|
||||
is_half: false
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
version: v2
|
||||
"""
|
||||
|
||||
@ -86,19 +92,19 @@ class TTS_Config:
|
||||
"device": "cpu",
|
||||
"is_half": False,
|
||||
"version": "v1",
|
||||
"t2s_weights_path": "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt",
|
||||
"vits_weights_path": "GPT_SoVITS/pretrained_models/s2G488k.pth",
|
||||
"cnhuhbert_base_path": "GPT_SoVITS/pretrained_models/chinese-hubert-base",
|
||||
"bert_base_path": "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",
|
||||
"t2s_weights_path": "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt",
|
||||
"vits_weights_path": "pretrained_models/s2G488k.pth",
|
||||
"cnhuhbert_base_path": "pretrained_models/chinese-hubert-base",
|
||||
"bert_base_path": "pretrained_models/chinese-roberta-wwm-ext-large",
|
||||
},
|
||||
"default_v2":{
|
||||
"device": "cpu",
|
||||
"is_half": False,
|
||||
"version": "v2",
|
||||
"t2s_weights_path": "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
|
||||
"vits_weights_path": "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
|
||||
"cnhuhbert_base_path": "GPT_SoVITS/pretrained_models/chinese-hubert-base",
|
||||
"bert_base_path": "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",
|
||||
"t2s_weights_path": "pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
|
||||
"vits_weights_path": "pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
|
||||
"cnhuhbert_base_path": "pretrained_models/chinese-hubert-base",
|
||||
"bert_base_path": "pretrained_models/chinese-roberta-wwm-ext-large",
|
||||
},
|
||||
}
|
||||
configs:dict = None
|
||||
@ -120,7 +126,7 @@ class TTS_Config:
|
||||
def __init__(self, configs: Union[dict, str]=None):
|
||||
|
||||
# 设置默认配置文件路径
|
||||
configs_base_path:str = "GPT_SoVITS/configs/"
|
||||
configs_base_path:str = "configs/"
|
||||
os.makedirs(configs_base_path, exist_ok=True)
|
||||
self.configs_path:str = os.path.join(configs_base_path, "tts_infer.yaml")
|
||||
|
||||
@ -153,21 +159,22 @@ class TTS_Config:
|
||||
self.cnhuhbert_base_path = self.configs.get("cnhuhbert_base_path", None)
|
||||
self.languages = self.v2_languages if self.version=="v2" else self.v1_languages
|
||||
|
||||
|
||||
if (self.t2s_weights_path in [None, ""]) or (not os.path.exists(self.t2s_weights_path)):
|
||||
self.t2s_weights_path = self.default_configs[default_config_key]['t2s_weights_path']
|
||||
self.t2s_weights_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['t2s_weights_path']))
|
||||
print(f"fall back to default t2s_weights_path: {self.t2s_weights_path}")
|
||||
if (self.vits_weights_path in [None, ""]) or (not os.path.exists(self.vits_weights_path)):
|
||||
self.vits_weights_path = self.default_configs[default_config_key]['vits_weights_path']
|
||||
self.vits_weights_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['vits_weights_path']))
|
||||
print(f"fall back to default vits_weights_path: {self.vits_weights_path}")
|
||||
if (self.bert_base_path in [None, ""]) or (not os.path.exists(self.bert_base_path)):
|
||||
self.bert_base_path = self.default_configs[default_config_key]['bert_base_path']
|
||||
self.bert_base_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['bert_base_path']))
|
||||
print(f"fall back to default bert_base_path: {self.bert_base_path}")
|
||||
if (self.cnhuhbert_base_path in [None, ""]) or (not os.path.exists(self.cnhuhbert_base_path)):
|
||||
self.cnhuhbert_base_path = self.default_configs[default_config_key]['cnhuhbert_base_path']
|
||||
self.cnhuhbert_base_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['cnhuhbert_base_path']))
|
||||
print(f"fall back to default cnhuhbert_base_path: {self.cnhuhbert_base_path}")
|
||||
self.update_configs()
|
||||
|
||||
repo_name="lj1995/GPT-SoVITS"
|
||||
snapshot_download(repo_id=repo_name, local_dir=os.path.dirname(self.bert_base_path))
|
||||
self.update_configs()
|
||||
|
||||
self.max_sec = None
|
||||
self.hz:int = 50
|
||||
|
@ -114,6 +114,8 @@ from fastapi import FastAPI, Request, HTTPException, Response
|
||||
from fastapi.responses import StreamingResponse, JSONResponse
|
||||
from fastapi import FastAPI, UploadFile, File
|
||||
import uvicorn
|
||||
|
||||
from importlib.resources import files
|
||||
from io import BytesIO
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
|
||||
@ -125,7 +127,7 @@ i18n = I18nAuto()
|
||||
cut_method_names = get_cut_method_names()
|
||||
|
||||
parser = argparse.ArgumentParser(description="GPT-SoVITS api")
|
||||
parser.add_argument("-c", "--tts_config", type=str, default="GPT_SoVITS/configs/tts_infer.yaml", help="tts_infer路径")
|
||||
parser.add_argument("-c", "--tts_config", type=str, default=None, help="tts_infer路径")
|
||||
parser.add_argument("-a", "--bind_addr", type=str, default="127.0.0.1", help="default: 127.0.0.1")
|
||||
parser.add_argument("-p", "--port", type=int, default="9880", help="default: 9880")
|
||||
args = parser.parse_args()
|
||||
@ -136,7 +138,7 @@ host = args.bind_addr
|
||||
argv = sys.argv
|
||||
|
||||
if config_path in [None, ""]:
|
||||
config_path = "GPT-SoVITS/configs/tts_infer.yaml"
|
||||
config_path = str(files("GPT_SoVITS").joinpath("configs/tts_infer.yaml"))
|
||||
|
||||
tts_config = TTS_Config(config_path)
|
||||
print(tts_config)
|
||||
@ -394,7 +396,7 @@ async def tts_get_endpoint(
|
||||
|
||||
@APP.post("/tts")
|
||||
async def tts_post_endpoint(request: TTS_Request):
|
||||
req = request.dict()
|
||||
req = request.model_dump()
|
||||
return await tts_handle(req)
|
||||
|
||||
|
||||
@ -449,7 +451,8 @@ async def set_sovits_weights(weights_path: str = None):
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
def main():
|
||||
global port, host, argv
|
||||
try:
|
||||
if host == 'None': # 在调用时使用 -a None 参数,可以让api监听双栈
|
||||
host = None
|
||||
@ -458,3 +461,6 @@ if __name__ == "__main__":
|
||||
traceback.print_exc()
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,24 +1,24 @@
|
||||
custom:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||
device: cuda
|
||||
is_half: true
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
version: v2
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
default:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||
device: cpu
|
||||
is_half: false
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||
t2s_weights_path: pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||
version: v1
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/s2G488k.pth
|
||||
vits_weights_path: pretrained_models/s2G488k.pth
|
||||
default_v2:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||
device: cpu
|
||||
is_half: false
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
version: v2
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
|
1
MANIFEST.in
Normal file
1
MANIFEST.in
Normal file
@ -0,0 +1 @@
|
||||
include GPT_SoVITS/configs/*
|
@ -1,3 +1,6 @@
|
||||
# Jarod's NOTE
|
||||
Working on turning this into a package. Right now, the API *does in fact* work to make requests to and this can be installed. Will get instrucitons out sometime in the future.
|
||||
|
||||
<div align="center">
|
||||
|
||||
|
||||
|
63
pyproject.toml
Normal file
63
pyproject.toml
Normal file
@ -0,0 +1,63 @@
|
||||
[build-system]
|
||||
requires = ["setuptools >= 61.0", "setuptools-scm>=8.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "GPT_SoVITS"
|
||||
dynamic = ["version"]
|
||||
description = ""
|
||||
readme = "README.md"
|
||||
license = {text = "MIT License"}
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
]
|
||||
dependencies = [
|
||||
"numpy==1.26.4",
|
||||
"scipy",
|
||||
"tensorboard",
|
||||
"librosa==0.9.2",
|
||||
"numba",
|
||||
"pytorch-lightning",
|
||||
"gradio>=4.0,<=4.24.0",
|
||||
"ffmpeg-python",
|
||||
"onnxruntime; sys_platform == 'darwin'",
|
||||
"onnxruntime-gpu; sys_platform != 'darwin'",
|
||||
"tqdm",
|
||||
"funasr==1.0.27",
|
||||
"cn2an",
|
||||
"pypinyin",
|
||||
"pyopenjtalk>=0.3.4",
|
||||
"g2p_en",
|
||||
"torchaudio",
|
||||
"modelscope==1.10.0",
|
||||
"sentencepiece",
|
||||
"transformers",
|
||||
"chardet",
|
||||
"PyYAML",
|
||||
"psutil",
|
||||
"jieba_fast",
|
||||
"jieba",
|
||||
"LangSegment>=0.2.0",
|
||||
"Faster_Whisper",
|
||||
"wordsegment",
|
||||
"rotary_embedding_torch",
|
||||
"pyjyutping",
|
||||
"g2pk2",
|
||||
"ko_pron",
|
||||
"opencc; sys_platform != 'linux'",
|
||||
"opencc==1.1.1; sys_platform == 'linux'",
|
||||
"python_mecab_ko; sys_platform != 'win32'",
|
||||
"fastapi<0.112.2",
|
||||
]
|
||||
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/RVC-Boss/GPT-SoVITS"
|
||||
|
||||
[project.scripts]
|
||||
"gpt_sovits_api" = "GPT_SoVITS.api_v2:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["GPT_SoVITS"]
|
@ -1,8 +1,8 @@
|
||||
numpy==1.23.4
|
||||
numpy==1.26.4
|
||||
scipy
|
||||
tensorboard
|
||||
librosa==0.9.2
|
||||
numba==0.56.4
|
||||
numba
|
||||
pytorch-lightning
|
||||
gradio>=4.0,<=4.24.0
|
||||
ffmpeg-python
|
||||
|
Loading…
x
Reference in New Issue
Block a user