This commit is contained in:
XXXXRT666 2025-09-08 19:30:35 +08:00
parent 9f89f679c1
commit 26d5eaf1b4
26 changed files with 56 additions and 79 deletions

2
.gitignore vendored
View File

@ -18,7 +18,7 @@ speakers.json
ref_audios
tools/AP_BWE/24kto48k/*
!tools/AP_BWE/24kto48k/readme.txt
onnx
onnx_export
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@ -11,10 +11,10 @@ import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
version = os.environ.get("version", None)
from GPT_SoVITS.text import cleaned_text_to_sequence
version = os.environ.get("version", None)
# from config import exp_dir

View File

@ -152,7 +152,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config: dict,
max_seq_length: int = 1800,
max_seq_length: int = 2000,
max_batch_size: int = 10,
) -> None:
super().__init__(config, max_seq_length, max_batch_size)

View File

@ -87,7 +87,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config: dict,
max_seq_length: int = 1800,
max_seq_length: int = 2000,
max_batch_size: int = 10,
) -> None:
super().__init__(config, max_seq_length, max_batch_size)

View File

@ -91,7 +91,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config: dict,
max_seq_length: int = 1800,
max_seq_length: int = 2000,
max_batch_size: int = 10,
) -> None:
super().__init__(config, max_seq_length, max_batch_size)

View File

@ -75,7 +75,7 @@ class T2SEngine(T2SEngineProtocol):
transient=True,
) as progress,
):
max_token = min(1800 - int(session.input_pos.max()), 1500)
max_token = min(2000 - int(session.input_pos.max()), 1500)
task = progress.add_task("T2S Decoding", total=max_token)
for idx in range(1500):

View File

@ -43,7 +43,7 @@ class SinePositionalEmbedding(nn.Module):
embedding_dim: int,
scale: bool = False,
max_batch_size: int = 10,
max_seq_len: int = 1800,
max_seq_len: int = 2000,
):
super().__init__()
self.embedding_dim = embedding_dim
@ -278,7 +278,7 @@ class AttentionABC(ABC, nn.Module):
def prefill(self, x: Array, kv_cache: KVCache | KVCacheQ, attn_mask: Array):
bsz, seqlen, _ = cast(tuple[int, ...], x.shape)
q, k, v = self.in_proj(mx.expand_dims(x, 0)).split(3, axis=-1)
q, k, v = self.in_proj(x).split(3, axis=-1)
q, k, v = map(lambda x: x.reshape(bsz, seqlen, self.n_head, self.head_dim), (q, k, v))
@ -413,7 +413,7 @@ class T2SDecoderABC(nn.Module, T2SDecoderProtocol):
def __init__(
self,
config: dict,
max_seq_length: int = 1800,
max_seq_length: int = 2000,
max_batch_size: int = 10,
) -> None:
super().__init__()

View File

@ -100,7 +100,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config,
max_seq_length=1800,
max_seq_length=2000,
max_batch_size=10,
) -> None:
assert torch.cuda.is_available()

View File

@ -78,7 +78,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config,
max_seq_length=1800,
max_seq_length=2000,
max_batch_size=10,
) -> None:
super().__init__(config, max_seq_length, max_batch_size)

View File

@ -94,7 +94,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config,
max_seq_length=1800,
max_seq_length=2000,
max_batch_size=10,
) -> None:
super().__init__(config, max_seq_length, max_batch_size)

View File

@ -78,7 +78,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config,
max_seq_length=1800,
max_seq_length=2000,
max_batch_size=10,
) -> None:
super().__init__(config, max_seq_length, max_batch_size)

View File

@ -86,7 +86,7 @@ class T2SDecoder(T2SDecoderABC):
def __init__(
self,
config,
max_seq_length=1800,
max_seq_length=2000,
max_batch_size=10,
) -> None:
super().__init__(config, max_seq_length, max_batch_size)

View File

@ -57,7 +57,7 @@ class T2SEngine(T2SEngineProtocol):
transient=True,
) as progress,
):
max_token = int(min(1800 - session.input_pos.max(), 1500))
max_token = int(min(2000 - session.input_pos.max(), 1500))
task = progress.add_task("T2S Decoding", total=max_token)
for idx in range(max_token):

View File

@ -55,7 +55,7 @@ class SinePositionalEmbedding(nn.Module):
scale: bool = False,
alpha: bool = False,
max_batch_size: int = 10,
max_seq_len: int = 1800,
max_seq_len: int = 2000,
):
super().__init__()
self.embedding_dim = embedding_dim
@ -106,8 +106,9 @@ class SinePositionalEmbedding(nn.Module):
embedded_x (Tensor): [batch_size, seq_len, embed_dim]
"""
pe_values = self.pe[:, : x.shape[-2]]
return x * self.x_scale + self.alpha.item() * pe_values
batch_size = x.shape[0]
pe_values = self.pe[:batch_size, : x.shape[-2]]
return x * self.x_scale + self.alpha * pe_values
class KVCacheABC(nn.Module, ABC, KVCacheProtocol):
@ -290,7 +291,7 @@ class AttentionABC(nn.Module, ABC):
def prefill(self, x: Tensor, kv_cache: KVCacheProtocol, attn_mask: Tensor) -> Tensor:
bsz, seqlen, _ = x.shape
q, k, v = self.in_proj(x.unsqueeze(0)).chunk(3, dim=-1)
q, k, v = self.in_proj(x).chunk(3, dim=-1)
q, k, v = map(lambda x: x.contiguous().view(bsz, seqlen, self.n_head, self.head_dim), (q, k, v))
@ -416,7 +417,7 @@ class T2SDecoderABC(nn.Module, ABC, T2SDecoderProtocol):
def __init__(
self,
config: dict,
max_seq_length: int = 1800,
max_seq_length: int = 2000,
max_batch_size: int = 10,
) -> None:
super().__init__()

View File

@ -60,6 +60,7 @@ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
def set_high_priority():
@ -90,7 +91,7 @@ def lang_type(text: str) -> str:
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog="inference_webui",
description=f"PYTHONPATH=. python -s GPT_SoVITS/inference_webui.py zh_CN -b {backends[-1]}",
description=f"python -s -m GPT_SoVITS.inference_webui zh_CN -b {backends[-1]}",
)
p.add_argument(
"language",
@ -691,6 +692,8 @@ def get_tts_wav(
pause_second=0.3,
):
torch.set_grad_enabled(False)
ttfb_time = ttime()
if ref_wav_path:
pass
else:
@ -918,6 +921,8 @@ def get_tts_wav(
with torch.inference_mode():
wav_gen = vocoder_model(cfm_res) # type: ignore
audio = wav_gen[0][0]
if i_text == 0:
ttfb_time = ttime() - ttfb_time
max_audio = torch.abs(audio).max() # 简单防止16bit爆音
if max_audio > 1:
audio = audio / max_audio
@ -954,6 +959,10 @@ def get_tts_wav(
console.print(f">> Time Stamps: {t0:.3f}\t{t1:.3f}\t{t2:.3f}\t{t3:.3f}")
console.print(f">> Infer Speed: {infer_speed_avg:.2f} Token/s")
console.print(f">> RTF: {rtf_value:.2f}")
if ttfb_time > 2:
console.print(f">> TTFB: {ttfb_time:.3f} s")
else:
console.print(f">> TTFB: {ttfb_time * 1000:.3f} ms")
gr.Info(f"{infer_speed_avg:.2f} Token/s", title="Infer Speed")
gr.Info(f"{rtf_value:.2f}", title="RTF")

View File

@ -1,7 +1,6 @@
import enum
import os
import os.path as osp
import platform
import queue
import sys
import time
@ -16,7 +15,7 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
from torch.multiprocessing.spawn import spawn
from transformers import BertForMaskedLM, BertTokenizerFast
from GPT_SoVITS.Accelerate.logger import console, logger, SpeedColumnIteration
from GPT_SoVITS.Accelerate.logger import SpeedColumnIteration, console, logger
from GPT_SoVITS.text.cleaner import clean_text
from tools.my_utils import clean_path
@ -302,16 +301,8 @@ def is_powershell_env(env: dict) -> bool:
def get_prog_name() -> str:
system = platform.system()
env = os.environ.copy()
script_rel = osp.join("GPT_SoVITS", "prepare_datasets", osp.basename(__file__))
if system == "Windows":
if is_powershell_env(env):
return rf"$env:PYTHONPATH='.'; python -s {script_rel}"
else:
return rf"set PYTHONPATH=. && python -s {script_rel}"
else:
return f"PYTHONPATH=. python -s {script_rel}"
script_rel = ".".join(["GPT_SoVITS", "prepare_datasets", osp.basename(__file__)]).strip(".py")
return f"python -s -m {script_rel}"
if __name__ == "__main__":

View File

@ -1,7 +1,6 @@
import enum
import os
import os.path as osp
import platform
import queue
import sys
import time
@ -405,16 +404,8 @@ def is_powershell_env(env: dict) -> bool:
def get_prog_name() -> str:
system = platform.system()
env = os.environ.copy()
script_rel = os.path.join("GPT_SoVITS", "prepare_datasets", os.path.basename(__file__))
if system == "Windows":
if is_powershell_env(env):
return rf"$env:PYTHONPATH='.'; python -s {script_rel}"
else:
return rf"set PYTHONPATH=. && python -s {script_rel}"
else:
return f"PYTHONPATH=. python -s {script_rel}"
script_rel = ".".join(["GPT_SoVITS", "prepare_datasets", osp.basename(__file__)]).strip(".py")
return f"python -s -m {script_rel}"
if __name__ == "__main__":

View File

@ -2,7 +2,6 @@ import enum
import gc
import os
import os.path as osp
import platform
import queue
import sys
import time
@ -12,10 +11,10 @@ from typing import List, Tuple
import torch
import torch.multiprocessing as tmp
import typer
from rich.progress import BarColumn, Progress, TimeRemainingColumn, TextColumn
from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
from torch.multiprocessing.spawn import spawn
from GPT_SoVITS.Accelerate.logger import console, logger, SpeedColumnIteration
from GPT_SoVITS.Accelerate.logger import SpeedColumnIteration, console, logger
from GPT_SoVITS.module.models import SynthesizerTrn, SynthesizerTrnV3
from GPT_SoVITS.process_ckpt import inspect_version
from tools.my_utils import DictToAttrRecursive, clean_path
@ -295,16 +294,8 @@ def is_powershell_env(env: dict) -> bool:
def get_prog_name() -> str:
system = platform.system()
env = os.environ.copy()
script_rel = osp.join("GPT_SoVITS", "prepare_datasets", osp.basename(__file__))
if system == "Windows":
if is_powershell_env(env):
return rf"$env:PYTHONPATH='.'; python -s {script_rel}"
else:
return rf"set PYTHONPATH=. && python -s {script_rel}"
else:
return f"PYTHONPATH=. python -s {script_rel}"
script_rel = ".".join(["GPT_SoVITS", "prepare_datasets", osp.basename(__file__)]).strip(".py")
return f"python -s -m {script_rel}"
if __name__ == "__main__":

View File

@ -273,13 +273,13 @@ Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference
#### Others
```bash
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
```
OR
```bash
PYTHONPATH=. python webui.py
python webui.py
```
then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`

View File

@ -265,7 +265,7 @@ python webui.py <language(optional)>
#### 其他
```bash
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
```
或者

View File

@ -251,13 +251,13 @@ python webui.py <言語(オプション)>
#### その他
```bash
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
```
または
```bash
PYTHONPATH=. python webui.py
python webui.py
```
その後、`1-GPT-SoVITS-TTS/1C-inference`で推論 webui を開きます.

View File

@ -259,7 +259,7 @@ python webui.py <언어(옵션)>
#### 기타
```bash
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
```
또는

View File

@ -259,7 +259,7 @@ python webui.py <dil(isteğe bağlı)>
#### Diğerleri
```text
PYTHONPATH=. python GPT_SoVITS/inference_webui.py <language(optional)> -b <backend> -p <port>
python -m GPT_SoVITS.inference_webui <language(optional)> -b <backend> -p <port>
```
VEYA

View File

@ -2,6 +2,5 @@ set "SCRIPT_DIR=%~dp0"
set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
cd /d "%SCRIPT_DIR%"
set "PATH=%SCRIPT_DIR%\runtime"
set "PYTHONPATH=%SCRIPT_DIR%"
runtime\python.exe -I webui.py zh_CN
pause

View File

@ -3,6 +3,5 @@ chcp 65001
Set-Location $PSScriptRoot
$runtimePath = Join-Path $PSScriptRoot "runtime"
$env:PATH = "$runtimePath"
$env:PYTHONPATH = "$runtimePath"
& "$runtimePath\python.exe" -I "$PSScriptRoot\webui.py" zh_CN
pause

View File

@ -44,11 +44,11 @@ from tools.assets import css, js, top_html
from tools.i18n.i18n import I18nAuto, scan_language_list
from tools.my_utils import check_details, check_for_existance
os.environ["PYTHONPATH"] = now_dir = os.getcwd()
os.environ["version"] = version = "v2Pro"
os.environ["TORCH_DISTRIBUTED_DEBUG"] = "INFO"
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
os.environ["all_proxy"] = ""
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
backends_gradio = [(b.replace("-", " "), b) for b in backends]
@ -86,7 +86,7 @@ def build_parser() -> argparse.ArgumentParser:
args = build_parser().parse_args()
tmp = os.path.join(now_dir, "TEMP")
tmp = "TEMP"
os.makedirs(tmp, exist_ok=True)
os.environ["TEMP"] = tmp
if os.path.exists(tmp):
@ -794,11 +794,10 @@ def open1a(
opt_dir = f"{exp_root}/{exp_name}"
env = os.environ.copy()
env["PYTHONPATH"] = os.getcwd()
# fmt: off
cmd = [
python_exec, "-s", "GPT_SoVITS/prepare_datasets/1-get-text.py",
python_exec, "-s", "-m", "GPT_SoVITS.prepare_datasets.1_get_text",
"--inp-list", inp_text,
"--opt", opt_dir,
"--bert", bert_pretrained_dir,
@ -884,11 +883,10 @@ def open1b(
opt_dir = f"{exp_root}/{exp_name}"
env = os.environ.copy()
env["PYTHONPATH"] = os.getcwd()
# fmt: off
cmd = [
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2-get-hubert-sv-wav32k.py",
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2_get_hubert_sv_wav32k.py",
"--inp-list", inp_text,
"--opt", opt_dir,
"--cnhubert", ssl_pretrained_dir,
@ -977,11 +975,10 @@ def open1c(
opt_dir = f"{exp_root}/{exp_name}"
env = os.environ.copy()
env["PYTHONPATH"] = os.getcwd()
# fmt: off
cmd = [
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3-get-semantic.py",
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3_get_semantic.py",
"--inp-list", inp_text,
"--opt", opt_dir,
"--pretrained-s2g", pretrained_s2G_path,
@ -1073,12 +1070,11 @@ def open1abc(
opt_dir = f"{exp_root}/{exp_name}"
env = os.environ.copy()
env["PYTHONPATH"] = os.getcwd()
# Step 1
# fmt: off
cmd_1 = [
python_exec, "-s", "GPT_SoVITS/prepare_datasets/1-get-text.py",
python_exec, "-s", "GPT_SoVITS/prepare_datasets/1_get_text.py",
"--inp-list", inp_text,
"--opt", opt_dir,
"--bert", bert_pretrained_dir,
@ -1124,7 +1120,7 @@ def open1abc(
# Step 2
# fmt: off
cmd_2 = [
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2-get-hubert-sv-wav32k.py",
python_exec, "-s", "GPT_SoVITS/prepare_datasets/2_get_hubert_sv_wav32k.py",
"--inp-list", inp_text,
"--opt", opt_dir,
"--cnhubert", ssl_pretrained_dir,
@ -1175,7 +1171,7 @@ def open1abc(
# Step 3
# fmt: off
cmd_3 = [
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3-get-semantic.py",
python_exec, "-s", "GPT_SoVITS/prepare_datasets/3_get_semantic.py",
"--inp-list", inp_text,
"--opt", opt_dir,
"--pretrained-s2g", pretrained_s2G_path,