mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-09-29 00:30:15 +08:00
.
This commit is contained in:
parent
9f89f679c1
commit
26d5eaf1b4
2
.gitignore
vendored
2
.gitignore
vendored
@ -18,7 +18,7 @@ speakers.json
|
||||
ref_audios
|
||||
tools/AP_BWE/24kto48k/*
|
||||
!tools/AP_BWE/24kto48k/readme.txt
|
||||
onnx
|
||||
onnx_export
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
@ -11,10 +11,10 @@ import pandas as pd
|
||||
import torch
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
|
||||
version = os.environ.get("version", None)
|
||||
|
||||
from GPT_SoVITS.text import cleaned_text_to_sequence
|
||||
|
||||
version = os.environ.get("version", None)
|
||||
|
||||
# from config import exp_dir
|
||||
|
||||
|
||||
|
@ -152,7 +152,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config: dict,
|
||||
max_seq_length: int = 1800,
|
||||
max_seq_length: int = 2000,
|
||||
max_batch_size: int = 10,
|
||||
) -> None:
|
||||
super().__init__(config, max_seq_length, max_batch_size)
|
||||
|
@ -87,7 +87,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config: dict,
|
||||
max_seq_length: int = 1800,
|
||||
max_seq_length: int = 2000,
|
||||
max_batch_size: int = 10,
|
||||
) -> None:
|
||||
super().__init__(config, max_seq_length, max_batch_size)
|
||||
|
@ -91,7 +91,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config: dict,
|
||||
max_seq_length: int = 1800,
|
||||
max_seq_length: int = 2000,
|
||||
max_batch_size: int = 10,
|
||||
) -> None:
|
||||
super().__init__(config, max_seq_length, max_batch_size)
|
||||
|
@ -75,7 +75,7 @@ class T2SEngine(T2SEngineProtocol):
|
||||
transient=True,
|
||||
) as progress,
|
||||
):
|
||||
max_token = min(1800 - int(session.input_pos.max()), 1500)
|
||||
max_token = min(2000 - int(session.input_pos.max()), 1500)
|
||||
|
||||
task = progress.add_task("T2S Decoding", total=max_token)
|
||||
for idx in range(1500):
|
||||
|
@ -43,7 +43,7 @@ class SinePositionalEmbedding(nn.Module):
|
||||
embedding_dim: int,
|
||||
scale: bool = False,
|
||||
max_batch_size: int = 10,
|
||||
max_seq_len: int = 1800,
|
||||
max_seq_len: int = 2000,
|
||||
):
|
||||
super().__init__()
|
||||
self.embedding_dim = embedding_dim
|
||||
@ -278,7 +278,7 @@ class AttentionABC(ABC, nn.Module):
|
||||
def prefill(self, x: Array, kv_cache: KVCache | KVCacheQ, attn_mask: Array):
|
||||
bsz, seqlen, _ = cast(tuple[int, ...], x.shape)
|
||||
|
||||
q, k, v = self.in_proj(mx.expand_dims(x, 0)).split(3, axis=-1)
|
||||
q, k, v = self.in_proj(x).split(3, axis=-1)
|
||||
|
||||
q, k, v = map(lambda x: x.reshape(bsz, seqlen, self.n_head, self.head_dim), (q, k, v))
|
||||
|
||||
@ -413,7 +413,7 @@ class T2SDecoderABC(nn.Module, T2SDecoderProtocol):
|
||||
def __init__(
|
||||
self,
|
||||
config: dict,
|
||||
max_seq_length: int = 1800,
|
||||
max_seq_length: int = 2000,
|
||||
max_batch_size: int = 10,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
@ -100,7 +100,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config,
|
||||
max_seq_length=1800,
|
||||
max_seq_length=2000,
|
||||
max_batch_size=10,
|
||||
) -> None:
|
||||
assert torch.cuda.is_available()
|
||||
|
@ -78,7 +78,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config,
|
||||
max_seq_length=1800,
|
||||
max_seq_length=2000,
|
||||
max_batch_size=10,
|
||||
) -> None:
|
||||
super().__init__(config, max_seq_length, max_batch_size)
|
||||
|
@ -94,7 +94,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config,
|
||||
max_seq_length=1800,
|
||||
max_seq_length=2000,
|
||||
max_batch_size=10,
|
||||
) -> None:
|
||||
super().__init__(config, max_seq_length, max_batch_size)
|
||||
|
@ -78,7 +78,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config,
|
||||
max_seq_length=1800,
|
||||
max_seq_length=2000,
|
||||
max_batch_size=10,
|
||||
) -> None:
|
||||
super().__init__(config, max_seq_length, max_batch_size)
|
||||
|
@ -86,7 +86,7 @@ class T2SDecoder(T2SDecoderABC):
|
||||
def __init__(
|
||||
self,
|
||||
config,
|
||||
max_seq_length=1800,
|
||||
max_seq_length=2000,
|
||||
max_batch_size=10,
|
||||
) -> None:
|
||||
super().__init__(config, max_seq_length, max_batch_size)
|
||||
|
@ -57,7 +57,7 @@ class T2SEngine(T2SEngineProtocol):
|
||||
transient=True,
|
||||
) as progress,
|
||||
):
|
||||
max_token = int(min(1800 - session.input_pos.max(), 1500))
|
||||
max_token = int(min(2000 - session.input_pos.max(), 1500))
|
||||
task = progress.add_task("T2S Decoding", total=max_token)
|
||||
|
||||
for idx in range(max_token):
|
||||
|
@ -55,7 +55,7 @@ class SinePositionalEmbedding(nn.Module):
|
||||
scale: bool = False,
|
||||
alpha: bool = False,
|
||||
max_batch_size: int = 10,
|
||||
max_seq_len: int = 1800,
|
||||
max_seq_len: int = 2000,
|
||||
):
|
||||
super().__init__()
|
||||
self.embedding_dim = embedding_dim
|
||||
@ -106,8 +106,9 @@ class SinePositionalEmbedding(nn.Module):
|
||||
embedded_x (Tensor): [batch_size, seq_len, embed_dim]
|
||||
"""
|
||||
|
||||
pe_values = self.pe[:, : x.shape[-2]]
|
||||
return x * self.x_scale + self.alpha.item() * pe_values
|
||||
batch_size = x.shape[0]
|
||||
pe_values = self.pe[:batch_size, : x.shape[-2]]
|
||||
return x * self.x_scale + self.alpha * pe_values
|
||||
|
||||
|
||||
class KVCacheABC(nn.Module, ABC, KVCacheProtocol):
|
||||
@ -290,7 +291,7 @@ class AttentionABC(nn.Module, ABC):
|
||||
def prefill(self, x: Tensor, kv_cache: KVCacheProtocol, attn_mask: Tensor) -> Tensor:
|
||||
bsz, seqlen, _ = x.shape
|
||||
|
||||
q, k, v = self.in_proj(x.unsqueeze(0)).chunk(3, dim=-1)
|
||||
q, k, v = self.in_proj(x).chunk(3, dim=-1)
|
||||
|
||||
q, k, v = map(lambda x: x.contiguous().view(bsz, seqlen, self.n_head, self.head_dim), (q, k, v))
|
||||
|
||||
@ -416,7 +417,7 @@ class T2SDecoderABC(nn.Module, ABC, T2SDecoderProtocol):
|
||||
def __init__(
|
||||
self,
|
||||
config: dict,
|
||||
max_seq_length: int = 1800,
|
||||
max_seq_length: int = 2000,
|
||||
max_batch_size: int = 10,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
@ -60,6 +60,7 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
||||
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
||||
|
||||
|
||||
def set_high_priority():
|
||||
@ -90,7 +91,7 @@ def lang_type(text: str) -> str:
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
p = argparse.ArgumentParser(
|
||||
prog="inference_webui",
|
||||
description=f"PYTHONPATH=. python -s GPT_SoVITS/inference_webui.py zh_CN -b {backends[-1]}",
|
||||
description=f"python -s -m GPT_SoVITS.inference_webui zh_CN -b {backends[-1]}",
|
||||
)
|
||||
p.add_argument(
|
||||
"language",
|
||||
@ -691,6 +692,8 @@ def get_tts_wav(
|
||||
pause_second=0.3,
|
||||
):
|
||||
torch.set_grad_enabled(False)
|
||||
ttfb_time = ttime()
|
||||
|
||||
if ref_wav_path:
|
||||
pass
|
||||
else:
|
||||
@ -918,6 +921,8 @@ def get_tts_wav(
|
||||
with torch.inference_mode():
|
||||
wav_gen = vocoder_model(cfm_res) # type: ignore
|
||||
audio = wav_gen[0][0]
|
||||
if i_text == 0:
|
||||
ttfb_time = ttime() - ttfb_time
|
||||
max_audio = torch.abs(audio).max() # 简单防止16bit爆音
|
||||
if max_audio > 1:
|
||||
audio = audio / max_audio
|
||||
@ -954,6 +959,10 @@ def get_tts_wav(
|
||||
console.print(f">> Time Stamps: {t0:.3f}\t{t1:.3f}\t{t2:.3f}\t{t3:.3f}")
|
||||
console.print(f">> Infer Speed: {infer_speed_avg:.2f} Token/s")
|
||||
console.print(f">> RTF: {rtf_value:.2f}")
|
||||
if ttfb_time > 2:
|
||||
console.print(f">> TTFB: {ttfb_time:.3f} s")
|
||||
else:
|
||||
console.print(f">> TTFB: {ttfb_time * 1000:.3f} ms")
|
||||
|
||||
gr.Info(f"{infer_speed_avg:.2f} Token/s", title="Infer Speed")
|
||||
gr.Info(f"{rtf_value:.2f}", title="RTF")
|
||||
|
@ -1,7 +1,6 @@
|
||||
import enum
|
||||
import os
|
||||
import os.path as osp
|
||||
import platform
|
||||
import queue
|
||||
import sys
|
||||
import time
|
||||
@ -16,7 +15,7 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
|
||||
from torch.multiprocessing.spawn import spawn
|
||||
from transformers import BertForMaskedLM, BertTokenizerFast
|
||||
|
||||
from GPT_SoVITS.Accelerate.logger import console, logger, SpeedColumnIteration
|
||||
from GPT_SoVITS.Accelerate.logger import SpeedColumnIteration, console, logger
|
||||
from GPT_SoVITS.text.cleaner import clean_text
|
||||
from tools.my_utils import clean_path
|
||||
|
||||
@ -302,16 +301,8 @@ def is_powershell_env(env: dict) -> bool:
|
||||
|
||||
|
||||
def get_prog_name() -> str:
|
||||
system = platform.system()
|
||||
env = os.environ.copy()
|
||||
script_rel = osp.join("GPT_SoVITS", "prepare_datasets", osp.basename(__file__))
|
||||
if system == "Windows":
|
||||
if is_powershell_env(env):
|
||||
return rf"$env:PYTHONPATH='.'; python -s {script_rel}"
|
||||
else:
|
||||
return rf"set PYTHONPATH=. && python -s {script_rel}"
|
||||
else:
|
||||
return f"PYTHONPATH=. python -s {script_rel}"
|
||||
script_rel = ".".join(["GPT_SoVITS", "prepare_datasets", osp.basename(__file__)]).strip(".py")
|
||||
return f"python -s -m {script_rel}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
@ -1,7 +1,6 @@
|
||||
import enum
|
||||
import os
|
||||
import os.path as osp
|
||||
import platform
|
||||
import queue
|
||||
import sys
|
||||
import time
|
||||
@ -405,16 +404,8 @@ def is_powershell_env(env: dict) -> bool:
|
||||
|
||||
|
||||
def get_prog_name() -> str:
|
||||
system = platform.system()
|
||||
env = os.environ.copy()
|
||||
script_rel = os.path.join("GPT_SoVITS", "prepare_datasets", os.path.basename(__file__))
|
||||
if system == "Windows":
|
||||
if is_powershell_env(env):
|
||||
return rf"$env:PYTHONPATH='.'; python -s {script_rel}"
|
||||
else:
|
||||
return rf"set PYTHONPATH=. && python -s {script_rel}"
|
||||
else:
|
||||
return f"PYTHONPATH=. python -s {script_rel}"
|
||||
script_rel = ".".join(["GPT_SoVITS", "prepare_datasets", osp.basename(__file__)]).strip(".py")
|
||||
return f"python -s -m {script_rel}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
@ -2,7 +2,6 @@ import enum
|
||||
import gc
|
||||
import os
|
||||
import os.path as osp
|
||||
import platform
|
||||
import queue
|
||||
import sys
|
||||
import time
|
||||
@ -12,10 +11,10 @@ from typing import List, Tuple
|
||||
import torch
|
||||
import torch.multiprocessing as tmp
|
||||
import typer
|
||||
from rich.progress import BarColumn, Progress, TimeRemainingColumn, TextColumn
|
||||
from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
|
||||
from torch.multiprocessing.spawn import spawn
|
||||
|
||||
from GPT_SoVITS.Accelerate.logger import console, logger, SpeedColumnIteration
|
||||
from GPT_SoVITS.Accelerate.logger import SpeedColumnIteration, console, logger
|
||||
from GPT_SoVITS.module.models import SynthesizerTrn, SynthesizerTrnV3
|
||||
from GPT_SoVITS.process_ckpt import inspect_version
|
||||
from tools.my_utils import DictToAttrRecursive, clean_path
|
||||
@ -295,16 +294,8 @@ def is_powershell_env(env: dict) -> bool:
|
||||
|
||||
|
||||
def get_prog_name() -> str:
|
||||
system = platform.system()
|
||||
env = os.environ.copy()
|
||||
script_rel = osp.join("GPT_SoVITS", "prepare_datasets", osp.basename(__file__))
|
||||
if system == "Windows":
|
||||
if is_powershell_env(env):
|
||||
return rf"$env:PYTHONPATH='.'; python -s {script_rel}"
|
||||
else:
|
||||
return rf"set PYTHONPATH=. && python -s {script_rel}"
|
||||
else:
|
||||
return f"PYTHONPATH=. python -s {script_rel}"
|
||||
script_rel = ".".join(["GPT_SoVITS", "prepare_datasets", osp.basename(__file__)]).strip(".py")
|
||||
return f"python -s -m {script_rel}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
@ -273,13 +273,13 @@ Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference
|
||||
#### Others
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
|
||||
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
|
||||
```
|
||||
|
||||
OR
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python webui.py
|
||||
python webui.py
|
||||
```
|
||||
|
||||
then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
|
||||
|
@ -265,7 +265,7 @@ python webui.py <language(optional)>
|
||||
#### 其他
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
|
||||
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
|
||||
```
|
||||
|
||||
或者
|
||||
|
@ -251,13 +251,13 @@ python webui.py <言語(オプション)>
|
||||
#### その他
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
|
||||
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
|
||||
```
|
||||
|
||||
または
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python webui.py
|
||||
python webui.py
|
||||
```
|
||||
|
||||
その後、`1-GPT-SoVITS-TTS/1C-inference`で推論 webui を開きます.
|
||||
|
@ -259,7 +259,7 @@ python webui.py <언어(옵션)>
|
||||
#### 기타
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
|
||||
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
|
||||
```
|
||||
|
||||
또는
|
||||
|
@ -259,7 +259,7 @@ python webui.py <dil(isteğe bağlı)>
|
||||
#### Diğerleri
|
||||
|
||||
```text
|
||||
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
|
||||
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
|
||||
```
|
||||
|
||||
VEYA
|
||||
|
@ -2,6 +2,5 @@ set "SCRIPT_DIR=%~dp0"
|
||||
set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
|
||||
cd /d "%SCRIPT_DIR%"
|
||||
set "PATH=%SCRIPT_DIR%\runtime"
|
||||
set "PYTHONPATH=%SCRIPT_DIR%"
|
||||
runtime\python.exe -I webui.py zh_CN
|
||||
pause
|
||||
|
@ -3,6 +3,5 @@ chcp 65001
|
||||
Set-Location $PSScriptRoot
|
||||
$runtimePath = Join-Path $PSScriptRoot "runtime"
|
||||
$env:PATH = "$runtimePath"
|
||||
$env:PYTHONPATH = "$runtimePath"
|
||||
& "$runtimePath\python.exe" -I "$PSScriptRoot\webui.py" zh_CN
|
||||
pause
|
||||
|
20
webui.py
20
webui.py
@ -44,11 +44,11 @@ from tools.assets import css, js, top_html
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
from tools.my_utils import check_details, check_for_existance
|
||||
|
||||
os.environ["PYTHONPATH"] = now_dir = os.getcwd()
|
||||
os.environ["version"] = version = "v2Pro"
|
||||
os.environ["TORCH_DISTRIBUTED_DEBUG"] = "INFO"
|
||||
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
|
||||
os.environ["all_proxy"] = ""
|
||||
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
||||
|
||||
|
||||
backends_gradio = [(b.replace("-", " "), b) for b in backends]
|
||||
@ -86,7 +86,7 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
|
||||
args = build_parser().parse_args()
|
||||
|
||||
tmp = os.path.join(now_dir, "TEMP")
|
||||
tmp = "TEMP"
|
||||
os.makedirs(tmp, exist_ok=True)
|
||||
os.environ["TEMP"] = tmp
|
||||
if os.path.exists(tmp):
|
||||
@ -794,11 +794,10 @@ def open1a(
|
||||
opt_dir = f"{exp_root}/{exp_name}"
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = os.getcwd()
|
||||
|
||||
# fmt: off
|
||||
cmd = [
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/1-get-text.py",
|
||||
python_exec, "-s", "-m", "GPT_SoVITS.prepare_datasets.1_get_text",
|
||||
"--inp-list", inp_text,
|
||||
"--opt", opt_dir,
|
||||
"--bert", bert_pretrained_dir,
|
||||
@ -884,11 +883,10 @@ def open1b(
|
||||
opt_dir = f"{exp_root}/{exp_name}"
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = os.getcwd()
|
||||
|
||||
# fmt: off
|
||||
cmd = [
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2-get-hubert-sv-wav32k.py",
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2_get_hubert_sv_wav32k.py",
|
||||
"--inp-list", inp_text,
|
||||
"--opt", opt_dir,
|
||||
"--cnhubert", ssl_pretrained_dir,
|
||||
@ -977,11 +975,10 @@ def open1c(
|
||||
opt_dir = f"{exp_root}/{exp_name}"
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = os.getcwd()
|
||||
|
||||
# fmt: off
|
||||
cmd = [
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3-get-semantic.py",
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3_get_semantic.py",
|
||||
"--inp-list", inp_text,
|
||||
"--opt", opt_dir,
|
||||
"--pretrained-s2g", pretrained_s2G_path,
|
||||
@ -1073,12 +1070,11 @@ def open1abc(
|
||||
opt_dir = f"{exp_root}/{exp_name}"
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = os.getcwd()
|
||||
|
||||
# Step 1
|
||||
# fmt: off
|
||||
cmd_1 = [
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/1-get-text.py",
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/1_get_text.py",
|
||||
"--inp-list", inp_text,
|
||||
"--opt", opt_dir,
|
||||
"--bert", bert_pretrained_dir,
|
||||
@ -1124,7 +1120,7 @@ def open1abc(
|
||||
# Step 2
|
||||
# fmt: off
|
||||
cmd_2 = [
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2-get-hubert-sv-wav32k.py",
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2_get_hubert_sv_wav32k.py",
|
||||
"--inp-list", inp_text,
|
||||
"--opt", opt_dir,
|
||||
"--cnhubert", ssl_pretrained_dir,
|
||||
@ -1175,7 +1171,7 @@ def open1abc(
|
||||
# Step 3
|
||||
# fmt: off
|
||||
cmd_3 = [
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3-get-semantic.py",
|
||||
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3_get_semantic.py",
|
||||
"--inp-list", inp_text,
|
||||
"--opt", opt_dir,
|
||||
"--pretrained-s2g", pretrained_s2G_path,
|
||||
|
Loading…
x
Reference in New Issue
Block a user