mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-08-18 15:59:51 +08:00
turn it into a package
This commit is contained in:
parent
a70e1ad30c
commit
71b2fe69ea
@ -1,60 +1,66 @@
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import math
|
import math
|
||||||
import os, sys, gc
|
import os, sys, gc
|
||||||
import random
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
from tqdm import tqdm
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
import os
|
import librosa
|
||||||
from typing import Generator, List, Tuple, Union
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import random
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
import traceback
|
||||||
import yaml
|
import yaml
|
||||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
|
||||||
|
from huggingface_hub import snapshot_download, hf_hub_download
|
||||||
|
from importlib.resources import files
|
||||||
|
from time import time as ttime
|
||||||
|
from typing import Generator, List, Tuple, Union
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
||||||
from feature_extractor.cnhubert import CNHubert
|
from feature_extractor.cnhubert import CNHubert
|
||||||
from module.models import SynthesizerTrn
|
from module.models import SynthesizerTrn
|
||||||
import librosa
|
from module.mel_processing import spectrogram_torch
|
||||||
from time import time as ttime
|
|
||||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||||
from tools.my_utils import load_audio
|
from tools.my_utils import load_audio
|
||||||
from module.mel_processing import spectrogram_torch
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||||
from TTS_infer_pack.text_segmentation_method import splits
|
from TTS_infer_pack.text_segmentation_method import splits
|
||||||
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
from TTS_infer_pack.TextPreprocessor import TextPreprocessor
|
||||||
|
|
||||||
language=os.environ.get("language","Auto")
|
language=os.environ.get("language","Auto")
|
||||||
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
||||||
i18n = I18nAuto(language=language)
|
i18n = I18nAuto(language=language)
|
||||||
|
LIBRARY_NAME = "GPT_SoVITS"
|
||||||
|
|
||||||
# configs/tts_infer.yaml
|
# configs/tts_infer.yaml
|
||||||
"""
|
"""
|
||||||
custom:
|
custom:
|
||||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||||
device: cpu
|
device: cpu
|
||||||
is_half: false
|
is_half: false
|
||||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||||
version: v2
|
version: v2
|
||||||
default:
|
default:
|
||||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||||
device: cpu
|
device: cpu
|
||||||
is_half: false
|
is_half: false
|
||||||
t2s_weights_path: GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
t2s_weights_path: pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||||
vits_weights_path: GPT_SoVITS/pretrained_models/s2G488k.pth
|
vits_weights_path: pretrained_models/s2G488k.pth
|
||||||
version: v1
|
version: v1
|
||||||
default_v2:
|
default_v2:
|
||||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||||
device: cpu
|
device: cpu
|
||||||
is_half: false
|
is_half: false
|
||||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||||
version: v2
|
version: v2
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -86,19 +92,19 @@ class TTS_Config:
|
|||||||
"device": "cpu",
|
"device": "cpu",
|
||||||
"is_half": False,
|
"is_half": False,
|
||||||
"version": "v1",
|
"version": "v1",
|
||||||
"t2s_weights_path": "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt",
|
"t2s_weights_path": "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt",
|
||||||
"vits_weights_path": "GPT_SoVITS/pretrained_models/s2G488k.pth",
|
"vits_weights_path": "pretrained_models/s2G488k.pth",
|
||||||
"cnhuhbert_base_path": "GPT_SoVITS/pretrained_models/chinese-hubert-base",
|
"cnhuhbert_base_path": "pretrained_models/chinese-hubert-base",
|
||||||
"bert_base_path": "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",
|
"bert_base_path": "pretrained_models/chinese-roberta-wwm-ext-large",
|
||||||
},
|
},
|
||||||
"default_v2":{
|
"default_v2":{
|
||||||
"device": "cpu",
|
"device": "cpu",
|
||||||
"is_half": False,
|
"is_half": False,
|
||||||
"version": "v2",
|
"version": "v2",
|
||||||
"t2s_weights_path": "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
|
"t2s_weights_path": "pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
|
||||||
"vits_weights_path": "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
|
"vits_weights_path": "pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
|
||||||
"cnhuhbert_base_path": "GPT_SoVITS/pretrained_models/chinese-hubert-base",
|
"cnhuhbert_base_path": "pretrained_models/chinese-hubert-base",
|
||||||
"bert_base_path": "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",
|
"bert_base_path": "pretrained_models/chinese-roberta-wwm-ext-large",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
configs:dict = None
|
configs:dict = None
|
||||||
@ -120,7 +126,7 @@ class TTS_Config:
|
|||||||
def __init__(self, configs: Union[dict, str]=None):
|
def __init__(self, configs: Union[dict, str]=None):
|
||||||
|
|
||||||
# 设置默认配置文件路径
|
# 设置默认配置文件路径
|
||||||
configs_base_path:str = "GPT_SoVITS/configs/"
|
configs_base_path:str = "configs/"
|
||||||
os.makedirs(configs_base_path, exist_ok=True)
|
os.makedirs(configs_base_path, exist_ok=True)
|
||||||
self.configs_path:str = os.path.join(configs_base_path, "tts_infer.yaml")
|
self.configs_path:str = os.path.join(configs_base_path, "tts_infer.yaml")
|
||||||
|
|
||||||
@ -153,21 +159,22 @@ class TTS_Config:
|
|||||||
self.cnhuhbert_base_path = self.configs.get("cnhuhbert_base_path", None)
|
self.cnhuhbert_base_path = self.configs.get("cnhuhbert_base_path", None)
|
||||||
self.languages = self.v2_languages if self.version=="v2" else self.v1_languages
|
self.languages = self.v2_languages if self.version=="v2" else self.v1_languages
|
||||||
|
|
||||||
|
|
||||||
if (self.t2s_weights_path in [None, ""]) or (not os.path.exists(self.t2s_weights_path)):
|
if (self.t2s_weights_path in [None, ""]) or (not os.path.exists(self.t2s_weights_path)):
|
||||||
self.t2s_weights_path = self.default_configs[default_config_key]['t2s_weights_path']
|
self.t2s_weights_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['t2s_weights_path']))
|
||||||
print(f"fall back to default t2s_weights_path: {self.t2s_weights_path}")
|
print(f"fall back to default t2s_weights_path: {self.t2s_weights_path}")
|
||||||
if (self.vits_weights_path in [None, ""]) or (not os.path.exists(self.vits_weights_path)):
|
if (self.vits_weights_path in [None, ""]) or (not os.path.exists(self.vits_weights_path)):
|
||||||
self.vits_weights_path = self.default_configs[default_config_key]['vits_weights_path']
|
self.vits_weights_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['vits_weights_path']))
|
||||||
print(f"fall back to default vits_weights_path: {self.vits_weights_path}")
|
print(f"fall back to default vits_weights_path: {self.vits_weights_path}")
|
||||||
if (self.bert_base_path in [None, ""]) or (not os.path.exists(self.bert_base_path)):
|
if (self.bert_base_path in [None, ""]) or (not os.path.exists(self.bert_base_path)):
|
||||||
self.bert_base_path = self.default_configs[default_config_key]['bert_base_path']
|
self.bert_base_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['bert_base_path']))
|
||||||
print(f"fall back to default bert_base_path: {self.bert_base_path}")
|
print(f"fall back to default bert_base_path: {self.bert_base_path}")
|
||||||
if (self.cnhuhbert_base_path in [None, ""]) or (not os.path.exists(self.cnhuhbert_base_path)):
|
if (self.cnhuhbert_base_path in [None, ""]) or (not os.path.exists(self.cnhuhbert_base_path)):
|
||||||
self.cnhuhbert_base_path = self.default_configs[default_config_key]['cnhuhbert_base_path']
|
self.cnhuhbert_base_path = str(files(LIBRARY_NAME).joinpath(self.default_configs[default_config_key]['cnhuhbert_base_path']))
|
||||||
print(f"fall back to default cnhuhbert_base_path: {self.cnhuhbert_base_path}")
|
print(f"fall back to default cnhuhbert_base_path: {self.cnhuhbert_base_path}")
|
||||||
self.update_configs()
|
|
||||||
|
|
||||||
|
repo_name="lj1995/GPT-SoVITS"
|
||||||
|
snapshot_download(repo_id=repo_name, local_dir=os.path.dirname(self.bert_base_path))
|
||||||
|
self.update_configs()
|
||||||
|
|
||||||
self.max_sec = None
|
self.max_sec = None
|
||||||
self.hz:int = 50
|
self.hz:int = 50
|
||||||
|
@ -114,6 +114,8 @@ from fastapi import FastAPI, Request, HTTPException, Response
|
|||||||
from fastapi.responses import StreamingResponse, JSONResponse
|
from fastapi.responses import StreamingResponse, JSONResponse
|
||||||
from fastapi import FastAPI, UploadFile, File
|
from fastapi import FastAPI, UploadFile, File
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
|
from importlib.resources import files
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from tools.i18n.i18n import I18nAuto
|
from tools.i18n.i18n import I18nAuto
|
||||||
from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
|
from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
|
||||||
@ -125,7 +127,7 @@ i18n = I18nAuto()
|
|||||||
cut_method_names = get_cut_method_names()
|
cut_method_names = get_cut_method_names()
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="GPT-SoVITS api")
|
parser = argparse.ArgumentParser(description="GPT-SoVITS api")
|
||||||
parser.add_argument("-c", "--tts_config", type=str, default="GPT_SoVITS/configs/tts_infer.yaml", help="tts_infer路径")
|
parser.add_argument("-c", "--tts_config", type=str, default=None, help="tts_infer路径")
|
||||||
parser.add_argument("-a", "--bind_addr", type=str, default="127.0.0.1", help="default: 127.0.0.1")
|
parser.add_argument("-a", "--bind_addr", type=str, default="127.0.0.1", help="default: 127.0.0.1")
|
||||||
parser.add_argument("-p", "--port", type=int, default="9880", help="default: 9880")
|
parser.add_argument("-p", "--port", type=int, default="9880", help="default: 9880")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -136,7 +138,7 @@ host = args.bind_addr
|
|||||||
argv = sys.argv
|
argv = sys.argv
|
||||||
|
|
||||||
if config_path in [None, ""]:
|
if config_path in [None, ""]:
|
||||||
config_path = "GPT-SoVITS/configs/tts_infer.yaml"
|
config_path = str(files("GPT_SoVITS").joinpath("configs/tts_infer.yaml"))
|
||||||
|
|
||||||
tts_config = TTS_Config(config_path)
|
tts_config = TTS_Config(config_path)
|
||||||
print(tts_config)
|
print(tts_config)
|
||||||
@ -394,7 +396,7 @@ async def tts_get_endpoint(
|
|||||||
|
|
||||||
@APP.post("/tts")
|
@APP.post("/tts")
|
||||||
async def tts_post_endpoint(request: TTS_Request):
|
async def tts_post_endpoint(request: TTS_Request):
|
||||||
req = request.dict()
|
req = request.model_dump()
|
||||||
return await tts_handle(req)
|
return await tts_handle(req)
|
||||||
|
|
||||||
|
|
||||||
@ -449,7 +451,8 @@ async def set_sovits_weights(weights_path: str = None):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main():
|
||||||
|
global port, host, argv
|
||||||
try:
|
try:
|
||||||
if host == 'None': # 在调用时使用 -a None 参数,可以让api监听双栈
|
if host == 'None': # 在调用时使用 -a None 参数,可以让api监听双栈
|
||||||
host = None
|
host = None
|
||||||
@ -458,3 +461,6 @@ if __name__ == "__main__":
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
os.kill(os.getpid(), signal.SIGTERM)
|
os.kill(os.getpid(), signal.SIGTERM)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -1,24 +1,24 @@
|
|||||||
custom:
|
custom:
|
||||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||||
device: cuda
|
device: cuda
|
||||||
is_half: true
|
is_half: true
|
||||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||||
version: v2
|
version: v2
|
||||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||||
default:
|
default:
|
||||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||||
device: cpu
|
device: cpu
|
||||||
is_half: false
|
is_half: false
|
||||||
t2s_weights_path: GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
t2s_weights_path: pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||||
version: v1
|
version: v1
|
||||||
vits_weights_path: GPT_SoVITS/pretrained_models/s2G488k.pth
|
vits_weights_path: pretrained_models/s2G488k.pth
|
||||||
default_v2:
|
default_v2:
|
||||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
bert_base_path: pretrained_models/chinese-roberta-wwm-ext-large
|
||||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
cnhuhbert_base_path: pretrained_models/chinese-hubert-base
|
||||||
device: cpu
|
device: cpu
|
||||||
is_half: false
|
is_half: false
|
||||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
t2s_weights_path: pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||||
version: v2
|
version: v2
|
||||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
vits_weights_path: pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||||
|
1
MANIFEST.in
Normal file
1
MANIFEST.in
Normal file
@ -0,0 +1 @@
|
|||||||
|
include GPT_SoVITS/configs/*
|
@ -1,3 +1,6 @@
|
|||||||
|
# Jarod's NOTE
|
||||||
|
Working on turning this into a package. Right now, the API *does in fact* work to make requests to and this can be installed. Will get instrucitons out sometime in the future.
|
||||||
|
|
||||||
<div align="center">
|
<div align="center">
|
||||||
|
|
||||||
|
|
||||||
|
63
pyproject.toml
Normal file
63
pyproject.toml
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools >= 61.0", "setuptools-scm>=8.0"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "GPT_SoVITS"
|
||||||
|
dynamic = ["version"]
|
||||||
|
description = ""
|
||||||
|
readme = "README.md"
|
||||||
|
license = {text = "MIT License"}
|
||||||
|
classifiers = [
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
]
|
||||||
|
dependencies = [
|
||||||
|
"numpy==1.26.4",
|
||||||
|
"scipy",
|
||||||
|
"tensorboard",
|
||||||
|
"librosa==0.9.2",
|
||||||
|
"numba",
|
||||||
|
"pytorch-lightning",
|
||||||
|
"gradio>=4.0,<=4.24.0",
|
||||||
|
"ffmpeg-python",
|
||||||
|
"onnxruntime; sys_platform == 'darwin'",
|
||||||
|
"onnxruntime-gpu; sys_platform != 'darwin'",
|
||||||
|
"tqdm",
|
||||||
|
"funasr==1.0.27",
|
||||||
|
"cn2an",
|
||||||
|
"pypinyin",
|
||||||
|
"pyopenjtalk>=0.3.4",
|
||||||
|
"g2p_en",
|
||||||
|
"torchaudio",
|
||||||
|
"modelscope==1.10.0",
|
||||||
|
"sentencepiece",
|
||||||
|
"transformers",
|
||||||
|
"chardet",
|
||||||
|
"PyYAML",
|
||||||
|
"psutil",
|
||||||
|
"jieba_fast",
|
||||||
|
"jieba",
|
||||||
|
"LangSegment>=0.2.0",
|
||||||
|
"Faster_Whisper",
|
||||||
|
"wordsegment",
|
||||||
|
"rotary_embedding_torch",
|
||||||
|
"pyjyutping",
|
||||||
|
"g2pk2",
|
||||||
|
"ko_pron",
|
||||||
|
"opencc; sys_platform != 'linux'",
|
||||||
|
"opencc==1.1.1; sys_platform == 'linux'",
|
||||||
|
"python_mecab_ko; sys_platform != 'win32'",
|
||||||
|
"fastapi<0.112.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Homepage = "https://github.com/RVC-Boss/GPT-SoVITS"
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
"gpt_sovits_api" = "GPT_SoVITS.api_v2:main"
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
include = ["GPT_SoVITS"]
|
@ -1,8 +1,8 @@
|
|||||||
numpy==1.23.4
|
numpy==1.26.4
|
||||||
scipy
|
scipy
|
||||||
tensorboard
|
tensorboard
|
||||||
librosa==0.9.2
|
librosa==0.9.2
|
||||||
numba==0.56.4
|
numba
|
||||||
pytorch-lightning
|
pytorch-lightning
|
||||||
gradio>=4.0,<=4.24.0
|
gradio>=4.0,<=4.24.0
|
||||||
ffmpeg-python
|
ffmpeg-python
|
||||||
|
Loading…
x
Reference in New Issue
Block a user