Merge 50a88a596dea718c83e535136e9cb46b513cef6f into 2d9193b0d3c0eae0c3a14d8c68a839f1bae157dc

2026-06-06 22:48:17 +08:00 · 2026-02-10 15:39:51 +08:00
22 changed files with 1496 additions and 3277 deletions
--- a/GPT_SoVITS/AR/data/dataset.py
+++ b/GPT_SoVITS/AR/data/dataset.py
@ -67,10 +67,8 @@ class Text2SemanticDataset(Dataset):
            )
        )  # "%s/3-bert"%exp_dir#bert_dir
        self.path6 = semantic_path  # "%s/6-name2semantic.tsv"%exp_dir#semantic_path
-        if not os.path.exists(self.path2):
-            raise FileNotFoundError(f"Phoneme data file not found: {self.path2}")
-        if not os.path.exists(self.path6):
-            raise FileNotFoundError(f"Semantic data file not found: {self.path6}")
+        assert os.path.exists(self.path2)
+        assert os.path.exists(self.path6)
        self.phoneme_data = {}
        with open(self.path2, "r", encoding="utf8") as f:
            lines = f.read().strip("\n").split("\n")
@ -133,7 +131,7 @@ class Text2SemanticDataset(Dataset):
                phoneme, word2ph, text = self.phoneme_data[item_name]
            except Exception:
                traceback.print_exc()
-                print(f"Warning: File \"{item_name}\" not in self.phoneme_data! Skipped. ")
+                # print(f"{item_name} not in self.phoneme_data !")
                num_not_in += 1
                continue

@ -154,7 +152,7 @@ class Text2SemanticDataset(Dataset):
                phoneme_ids = cleaned_text_to_sequence(phoneme, version)
            except:
                traceback.print_exc()
-                print(f"Warning: Failed to convert phonemes to sequence for file \"{item_name}\"! Skipped. ")
+                # print(f"{item_name} not in self.phoneme_data !")
                num_not_in += 1
                continue
            # if len(phoneme_ids) >400:###########2：改为恒定限制为semantic/2.5就行
@ -230,11 +228,7 @@ class Text2SemanticDataset(Dataset):
            # bert_feature=torch.zeros_like(phoneme_ids,dtype=torch.float32)
            bert_feature = None
        else:
-            try:
-                assert bert_feature.shape[-1] == len(phoneme_ids)
-            except AssertionError:
-                print(f"AssertionError: The BERT feature dimension ({bert_feature.shape[-1]}) of the file '{item_name}' does not match the length of the phoneme sequence ({len(phoneme_ids)}).")
-                raise
+            assert bert_feature.shape[-1] == len(phoneme_ids)
        return {
            "idx": idx,
            "phoneme_ids": phoneme_ids,
--- a/GPT_SoVITS/AR/models/embedding_cudagraph.py
+++ b/GPT_SoVITS/AR/models/embedding_cudagraph.py
@ -1,76 +0,0 @@
-import math
-
-import torch
-from torch import nn
-
-
-class TokenEmbedding(nn.Module):
-    def __init__(self, embedding_dim: int, vocab_size: int, dropout: float = 0.0):
-        super().__init__()
-        self.vocab_size = vocab_size
-        self.embedding_dim = embedding_dim
-        self.dropout = nn.Dropout(p=dropout)
-        self.word_embeddings = nn.Embedding(self.vocab_size, self.embedding_dim)
-
-    @property
-    def weight(self) -> torch.Tensor:
-        return self.word_embeddings.weight
-
-    def embedding(self, index: int) -> torch.Tensor:
-        return self.word_embeddings.weight[index : index + 1]
-
-    def forward(self, x: torch.Tensor):
-        x = self.word_embeddings(x)
-        x = self.dropout(x)
-        return x
-
-
-class SinePositionalEmbeddingNested(nn.Module):
-    def __init__(
-        self,
-        embedding_dim: int,
-        dropout: float = 0.0,
-        scale: bool = False,
-        alpha: bool = False,
-        max_batch_size: int = 20,
-        max_seq_len: int = 2500,
-    ):
-        super().__init__()
-        self.embedding_dim = embedding_dim
-        self.x_scale = math.sqrt(embedding_dim) if scale else 1.0
-        self.alpha = nn.Parameter(torch.ones(1), requires_grad=alpha)
-        self.dropout = nn.Dropout(p=dropout)
-        self.max_batch_size = max_batch_size
-        self.max_seq_len = max_seq_len
-
-        self.reverse = False
-        self.register_buffer(
-            "pe", torch.zeros(max_batch_size, max_seq_len, embedding_dim), persistent=False
-        )
-        self.pe: torch.Tensor
-        self.compute_pe()
-
-    def compute_pe(self):
-        if self.reverse:
-            position = torch.arange(self.max_seq_len - 1, -1, -1.0, dtype=torch.float32).unsqueeze(1)
-        else:
-            position = torch.arange(self.max_seq_len, dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.embedding_dim, 2, dtype=torch.float32)
-            * -(math.log(10000.0) / self.embedding_dim)
-        )
-        pe = self.pe
-        pe[:, :, 0::2] = torch.sin(position * div_term)
-        pe[:, :, 1::2] = torch.cos(position * div_term)
-
-    def forward(self, input_pos: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
-        batch_size = x.shape[0]
-        pe_values = self.pe[torch.arange(batch_size), input_pos - 1]
-        return x * self.x_scale + self.alpha * pe_values.unsqueeze(1)
-
-    def prefill(self, x: torch.Tensor) -> torch.Tensor:
-        input_pos = torch.tensor([i.shape[0] for i in x.unbind()])
-        pe_values = torch.nested.nested_tensor(
-            [self.pe[i, : input_pos[i], :] for i in range(input_pos.size(0))]
-        )
-        return x * self.x_scale + self.alpha.item() * pe_values
--- a/GPT_SoVITS/AR/models/structs_cudagraph.py
+++ b/GPT_SoVITS/AR/models/structs_cudagraph.py
@ -1,78 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import List, Literal, Optional
-
-import torch
-
-Tensor = torch.Tensor
-
-
-@dataclass
-class T2SResult:
-    result: List[Tensor] | None = None
-    infer_speed: float = 0.0
-    status: Literal["Success", "Error"] = "Success"
-    exception: Optional[Exception] = None
-    traceback: Optional[str] = None
-
-
-@dataclass
-class T2SRequest:
-    x: List[torch.Tensor]
-    x_lens: Tensor
-    prompts: torch.Tensor
-    bert_feature: List[Tensor]
-    valid_length: int
-    top_k: int = 5
-    top_p: float = 1
-    early_stop_num: int = -1
-    temperature: float = 1.0
-    repetition_penalty: float = 1.35
-    use_cuda_graph: bool = False
-    debug: bool = False
-
-
-class T2SSession:
-    def __init__(self, decoder, request: T2SRequest, device: torch.device, dtype: torch.dtype):
-        with device:
-            self.decoder = decoder
-            self.request = request
-            self.device = device
-            self.dtype = dtype
-
-            bsz = len(request.x)
-            y_len = request.prompts.size(-1)
-            self.bsz = bsz
-            self.y_len = y_len
-
-            from AR.models.t2s_model_cudagraph import Sampler
-
-            self.sampler = Sampler(bsz, decoder.vocab_size)
-
-            self.x = request.x
-            self.x_lens = request.x_lens.to(torch.int32)
-            self.y = request.prompts
-            self.bert_feature = request.bert_feature
-
-            self.prefill_len = self.x_lens + self.y.size(1)
-
-            self.input_pos = torch.zeros_like(self.prefill_len)
-            self.input_pos.add_(self.prefill_len)
-
-            self.completed = torch.Tensor([False] * len(self.x)).bool().to(device)
-            self.y_results: List[Tensor] = [None] * len(self.x)  # type: ignore
-
-            self.xy_pos = decoder.embed(self.x, self.y, self.bert_feature)
-
-            attn_mask = []
-            for bs in range(bsz):
-                pos = int(self.x_lens[bs].item())
-                mask = torch.zeros(pos + y_len, pos + y_len).bool()
-                mask[:, :pos].fill_(True)
-                if y_len > 0:
-                    mask[-y_len:, -y_len:] = ~torch.triu(
-                        torch.ones(y_len, y_len, dtype=torch.bool), diagonal=1
-                    )
-                attn_mask.append(mask)
-            self.attn_mask_nested = torch.nested.nested_tensor(attn_mask)
--- a/GPT_SoVITS/AR/models/t2s_model_cudagraph.py
+++ b/GPT_SoVITS/AR/models/t2s_model_cudagraph.py
@ -1,602 +0,0 @@
-"""
-CUDA Graph accelerated T2S decoder.
-Uses PyTorch native scaled_dot_product_attention (no flash_attn dependency).
-Adapted from gsvpp/AR/models/t2s_model_abc.py and t2s_model_flash_attn.py.
-"""
-
-from __future__ import annotations
-
-import os
-import time
-import traceback
-from typing import Dict, List, MutableSequence, Optional, Tuple
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.cuda.graphs import CUDAGraph
-from tqdm import tqdm
-
-from AR.models.embedding_cudagraph import (
-    SinePositionalEmbeddingNested as SinePositionalEmbedding,
-)
-from AR.models.embedding_cudagraph import TokenEmbedding
-from AR.models.structs_cudagraph import T2SRequest, T2SResult, T2SSession
-
-Tensor = torch.Tensor
-
-
-class Sampler(nn.Module):
-    def __init__(self, batch_size: int, vocab_size: int) -> None:
-        super().__init__()
-        self.batch_size = batch_size
-
-    def sample(
-        self,
-        logits: Tensor,
-        previous_tokens: Tensor,
-        temperature: float,
-        top_k: int,
-        top_p: float,
-        repetition_penalty: float,
-    ) -> Tensor:
-        previous_tokens = previous_tokens.long()
-        score = torch.gather(logits, dim=1, index=previous_tokens)
-        score = torch.where(
-            score < 0, score * repetition_penalty, score / repetition_penalty
-        )
-        logits.scatter_(dim=1, index=previous_tokens, src=score)
-
-        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
-        cum_probs = torch.cumsum(
-            torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1
-        )
-        sorted_indices_to_remove = cum_probs > top_p
-        sorted_indices_to_remove[:, 0] = False
-        indices_to_remove = sorted_indices_to_remove.scatter(
-            dim=1, index=sorted_indices, src=sorted_indices_to_remove
-        )
-        logits = logits.masked_fill(indices_to_remove, -float("Inf"))
-
-        logits = logits / max(temperature, 1e-5)
-
-        v, _ = torch.topk(logits, top_k)
-        pivot = v[:, -1].unsqueeze(-1)
-        logits = torch.where(logits < pivot, -float("Inf"), logits)
-
-        probs = torch.nn.functional.softmax(logits, dim=-1)
-        q = torch.empty_like(probs).exponential_(1.0)
-        idx_next = torch.argmax(probs / q, dim=-1, keepdim=True).to(dtype=torch.int32)
-
-        return idx_next
-
-
-# ─── KV Cache ────────────────────<E29480><E29480><EFBFBD>───────────────────────────────────────────
-
-
-class KVCacheNHD(nn.Module):
-    def __init__(self, batch_size, max_seq_length, n_heads, head_dim):
-        super().__init__()
-        assert batch_size > 0
-        cache_shape = (batch_size, max_seq_length, n_heads, head_dim)
-        self.n_head = n_heads
-        self.head_dim = head_dim
-        self.batch_size = batch_size
-        self.max_seq_length = max_seq_length
-        self.register_buffer(
-            "k_cache", torch.zeros(size=cache_shape), persistent=False
-        )
-        self.register_buffer(
-            "v_cache", torch.zeros(size=cache_shape), persistent=False
-        )
-
-    def update(self, input_pos: Tensor, k_val: Tensor, v_val: Tensor):
-        index = (
-            (input_pos - 1)
-            .unsqueeze(-1)
-            .unsqueeze(-1)
-            .unsqueeze(-1)
-            .expand(-1, -1, self.n_head, self.head_dim)
-            .to(torch.int64)
-        )
-        k_out = self.k_cache
-        v_out = self.v_cache
-        k_out.scatter_(1, index, k_val)
-        v_out.scatter_(1, index, v_val)
-        return k_out, v_out
-
-    def empty(self):
-        self.k_cache.zero_()
-        self.v_cache.zero_()
-
-    def prefill_kv(self, k_val: Tensor, v_val: Tensor, bs: int):
-        self.k_cache[[bs], : k_val.shape[1]] = k_val
-        self.v_cache[[bs], : v_val.shape[1]] = v_val
-
-
-# ─── Attention (PyTorch native SDPA, no flash_attn) ─────────────────────────
-
-
-class Attention(nn.Module):
-    def __init__(self, n_head: int, hidden_dim: int):
-        super().__init__()
-        self.n_head = n_head
-        self.hidden_dim = hidden_dim
-        assert hidden_dim % n_head == 0
-        self.head_dim = hidden_dim // n_head
-        self.in_proj = nn.Linear(hidden_dim, hidden_dim * 3, bias=True)
-        self.out_proj = nn.Linear(hidden_dim, hidden_dim, bias=True)
-        self.dropout = nn.Dropout(0.1)
-
-        self._register_load_state_dict_pre_hook(self.load_hook)
-
-    def load_hook(self, state_dict: dict, prefix, *args):
-        keys_to_modify = [key for key in state_dict if "in_proj_" in key]
-        for key in keys_to_modify:
-            new_key = key.replace("in_proj_", "in_proj.")
-            state_dict[new_key] = state_dict.pop(key)
-
-    def forward(
-        self, x: Tensor, input_pos: Tensor, kv_cache: KVCacheNHD
-    ) -> Tensor:
-        bsz, seqlen, _ = x.shape
-
-        q, k, v = self.in_proj.forward(x).chunk(3, dim=-1)
-
-        q = q.view(bsz, seqlen, self.n_head, self.head_dim)
-        k = k.view(bsz, seqlen, self.n_head, self.head_dim)
-        v = v.view(bsz, seqlen, self.n_head, self.head_dim)
-
-        k_cache, v_cache = kv_cache.update(input_pos, k, v)
-
-        q = q.transpose(1, 2)  # [B, H, 1, D]
-        k_out = k_cache.transpose(1, 2)  # [B, H, max_seq, D]
-        v_out = v_cache.transpose(1, 2)  # [B, H, max_seq, D]
-
-        attn = F.scaled_dot_product_attention(q, k_out, v_out)
-
-        attn = self.dropout.forward(attn)
-        attn = attn.transpose(1, 2).reshape(bsz, seqlen, self.hidden_dim)
-        attn = self.out_proj.forward(attn)
-        return attn
-
-    def prefill(self, x: Tensor, mask: Tensor, kv_cache: KVCacheNHD) -> Tensor:
-        bsz = x.size(0)
-        outputs = []
-        for bs in range(bsz):
-            x_b = x[bs].unsqueeze(0)
-            q, k, v = self.in_proj.forward(x_b.unsqueeze(0)).chunk(3, dim=-1)
-            q = q.contiguous().view(1, -1, self.n_head, self.head_dim)
-            k = k.contiguous().view(1, -1, self.n_head, self.head_dim)
-            v = v.contiguous().view(1, -1, self.n_head, self.head_dim)
-            kv_cache.prefill_kv(k, v, bs)
-            q, k, v = map(lambda t: t.transpose(1, 2), (q, k, v))
-            attn_mask = (
-                mask[bs].unsqueeze(0).unsqueeze(0).expand(1, self.n_head, -1, -1)
-            )
-            attn = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask)
-            attn = self.dropout.forward(attn)
-            attn = attn.transpose(1, 2).contiguous().view(1, -1, self.hidden_dim)
-            output = self.out_proj.forward(attn)
-            outputs.append(output.squeeze(0))
-        return torch.nested.nested_tensor(outputs)
-
-
-# ─── Feed Forward ────────────────────────────────────────────────────────────
-
-
-class FeedForward(nn.Module):
-    def __init__(self, dim: int, hidden_dim: int) -> None:
-        super().__init__()
-        self.linear1 = nn.Linear(dim, hidden_dim, bias=True)
-        self.linear2 = nn.Linear(hidden_dim, dim, bias=True)
-        self.dropout = nn.Dropout(0.1)
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.dropout.forward(
-            self.linear2(self.dropout.forward(F.relu(self.linear1(x))))
-        )
-
-
-# ─── Transformer Block ──────────────────────────────────────────────────────
-
-
-class TransformerBlock(nn.Module):
-    def __init__(self, n_head, ffn_dim, hidden_dim) -> None:
-        super().__init__()
-        self.hidden_dim = hidden_dim
-        self.attention = Attention(n_head, hidden_dim)
-        self.feed_forward = FeedForward(hidden_dim, ffn_dim)
-        self.attention_norm = nn.LayerNorm([hidden_dim])
-        self.ffn_norm = nn.LayerNorm([hidden_dim])
-        self.dropout = nn.Dropout(0.1)
-
-        self._register_load_state_dict_pre_hook(self.load_hook)
-
-    def load_hook(self, state_dict: dict[str, Tensor], prefix, *args):
-        for key in list(state_dict.keys()):
-            new_key = (
-                key.replace("self_attn", "attention")
-                .replace("linear", "feed_forward.linear")
-                .replace("norm1", "attention_norm")
-                .replace("norm2", "ffn_norm")
-            )
-            state_dict[new_key] = state_dict.pop(key)
-
-    def forward(
-        self, x: Tensor, input_pos: Tensor, kv_cache: KVCacheNHD
-    ) -> Tensor:
-        h = self.attention_norm.forward(
-            x + self.dropout.forward(self.attention.forward(x, input_pos, kv_cache))
-        )
-        out = self.ffn_norm.forward(h + self.feed_forward.forward(h))
-        return out
-
-    def prefill(self, x: Tensor, mask: Tensor, kv_cache: KVCacheNHD) -> Tensor:
-        h = self.attention_norm.forward(
-            x + self.dropout.forward(self.attention.prefill(x, mask, kv_cache))
-        )
-        out = self.ffn_norm.forward(h + self.feed_forward.forward(h))
-        return out
-
-
-# ─── Transformer Decoder ────────────────────────────────────────────────────
-
-
-class TransformerDecoder(nn.Module):
-    def __init__(
-        self,
-        hidden_dim,
-        n_layer,
-        n_head,
-        ffn_dim,
-        vocab_size,
-        max_seq_length,
-        max_batch_size,
-    ) -> None:
-        super().__init__()
-        self.hidden_dim = hidden_dim
-        self.n_head = n_head
-        assert hidden_dim % n_head == 0
-        self.head_dim = hidden_dim // n_head
-        self.vocab_size = vocab_size
-        self.n_layer = n_layer
-        self.layers = nn.ModuleList(
-            TransformerBlock(n_head, ffn_dim, hidden_dim) for _ in range(n_layer)
-        )
-        self.max_seq_length: int = max_seq_length
-        self.max_batch_size: int = max_batch_size
-
-    def forward(
-        self,
-        input_pos: Tensor,
-        x: Tensor,
-        kv_caches: MutableSequence[KVCacheNHD],
-    ):
-        for layer, kv_cache in zip(self.layers, kv_caches):
-            x = layer.forward(x, input_pos, kv_cache)
-        return x
-
-    def prefill(
-        self,
-        x: Tensor,
-        mask: Tensor,
-        kv_caches: MutableSequence[KVCacheNHD],
-    ):
-        for layer, kv_cache in zip(self.layers, kv_caches):
-            x = layer.prefill(x, mask, kv_cache)
-        return x
-
-
-# ─── T2S Decoder ─────────────────────────────────────────────────────────────
-
-
-class T2SDecoder(nn.Module):
-    def __init__(
-        self,
-        config,
-        *args,
-        norm_first=False,
-        max_seq_length=2500,
-        max_batch_size=10,
-        **kwds,
-    ) -> None:
-        super().__init__()
-        hidden_dim = config["model"]["hidden_dim"]
-        embedding_dim = config["model"]["embedding_dim"]
-        n_head = config["model"]["head"]
-        n_layer = config["model"]["n_layer"]
-        vocab_size = config["model"]["vocab_size"]
-        phoneme_vocab_size = config["model"]["phoneme_vocab_size"]
-        p_dropout = config["model"]["dropout"]
-        EOS = config["model"]["EOS"]
-        ffn_dim = hidden_dim * 4
-
-        self.n_layer = n_layer
-        self.hidden_dim = hidden_dim
-        self.n_head = n_head
-        assert hidden_dim % n_head == 0
-        self.head_dim = hidden_dim // n_head
-        self.embedding_dim = embedding_dim
-        self.vocab_size = vocab_size
-        self.phoneme_vocab_size = phoneme_vocab_size
-        self.p_dropout = p_dropout
-        self.max_seq_length = max_seq_length
-        self.max_batch_size = max_batch_size
-        self.EOS = EOS
-        assert self.EOS == self.vocab_size - 1
-
-        self.bert_proj = nn.Linear(1024, self.embedding_dim)
-        self.ar_text_embedding = TokenEmbedding(
-            self.embedding_dim, self.phoneme_vocab_size, self.p_dropout
-        )
-        self.ar_text_position = SinePositionalEmbedding(
-            self.embedding_dim,
-            dropout=0.1,
-            scale=False,
-            alpha=True,
-            max_batch_size=max_batch_size,
-            max_seq_len=max_seq_length,
-        )
-        self.ar_audio_embedding = TokenEmbedding(
-            self.embedding_dim, self.vocab_size, self.p_dropout
-        )
-        self.ar_audio_position = SinePositionalEmbedding(
-            self.embedding_dim,
-            dropout=0.1,
-            scale=False,
-            alpha=True,
-            max_batch_size=max_batch_size,
-            max_seq_len=max_seq_length,
-        )
-        self.ar_predict_layer = nn.Linear(self.hidden_dim, self.vocab_size, bias=False)
-        self.h = TransformerDecoder(
-            hidden_dim,
-            n_layer,
-            n_head,
-            ffn_dim,
-            vocab_size,
-            max_seq_length,
-            max_batch_size,
-        )
-
-        self._register_load_state_dict_pre_hook(self.load_hook)
-
-    def load_hook(self, state_dict, prefix, *args):
-        model_keys = [key for key in state_dict if key.startswith("model.")]
-        for key in model_keys:
-            new_key = key[len("model.") :]
-            state_dict[new_key] = state_dict.pop(key)
-
-    def init_cache(self, bsz: int = 0) -> nn.ModuleList:
-        bsz = bsz or self.h.max_batch_size
-        assert bsz <= self.h.max_batch_size
-        seq_lens = self.h.max_seq_length
-        device = self.bert_proj.bias.device
-        dtype = self.bert_proj.bias.dtype
-        return nn.ModuleList(
-            [
-                KVCacheNHD(bsz, seq_lens, self.n_head, self.head_dim)
-                for _ in range(self.n_layer)
-            ],
-        ).to(device, dtype)
-
-    def embed(
-        self,
-        x: List[torch.Tensor],
-        y: torch.Tensor,
-        bert_features: List[torch.Tensor],
-    ):
-        x_nested = torch.nested.nested_tensor(x)
-        assert x_nested.size(0) <= self.max_batch_size
-        bert_features_nested = torch.nested.nested_tensor(
-            list(map(lambda t: t.transpose(0, 1), bert_features))
-        )
-        x_emb = self.ar_text_embedding.forward(x_nested)
-        bert = self.bert_proj.forward(bert_features_nested)
-        x_emb = x_emb + bert
-        x_pos = self.ar_text_position.prefill(x_emb)
-
-        y_nested = torch.nested.nested_tensor(list(y.unbind(0)))
-        y_emb = self.ar_audio_embedding.forward(y_nested)
-        y_pos = self.ar_audio_position.prefill(y_emb)
-
-        xy_pos = torch.nested.nested_tensor(
-            [torch.cat([x_pos[i], y_pos[i]]) for i in range(len(x))]
-        )
-        return xy_pos
-
-    def capture(
-        self,
-        input_pos: Tensor,
-        x: Tensor,
-        x_dec: Tensor,
-        kv_caches,
-    ) -> CUDAGraph:
-        s = torch.cuda.Stream()
-        s.wait_stream(torch.cuda.current_stream())
-
-        graph = torch.cuda.CUDAGraph()
-
-        with torch.cuda.stream(s):
-            for _ in range(5):
-                self.h.forward(input_pos, x, kv_caches)
-        torch.cuda.current_stream().wait_stream(s)
-
-        with torch.cuda.graph(graph):
-            x_dec.copy_(self.h.forward(input_pos, x, kv_caches))
-        torch.cuda.synchronize()
-
-        return graph
-
-
-# ─── CUDA Graph Runner ───────────────────────────────────────────────────────
-
-
-class CUDAGraphRunner:
-    def __init__(
-        self,
-        decoder_model: T2SDecoder,
-        device: torch.device = torch.device("cpu"),
-        dtype: torch.dtype = torch.float32,
-    ) -> None:
-        assert device.type in {"cpu", "cuda", "mps", "xpu", "mtia"}
-        assert dtype in {torch.float16, torch.bfloat16, torch.float32}
-        self.device = device
-        self.dtype = dtype
-        self.decoder_model: T2SDecoder = decoder_model.to(self.device, self.dtype)
-        self.graph: Optional[CUDAGraph] = None
-        self.xy_pos_ = torch.rand(
-            (1, 1, decoder_model.embedding_dim), device=device
-        ).to(dtype)
-        self.xy_dec_ = torch.rand(
-            (1, 1, decoder_model.embedding_dim), device=device
-        ).to(dtype)
-        self.kv_cache = decoder_model.init_cache(1)
-        self.input_pos = torch.tensor([10]).int().cuda()
-
-    def _handle_request(self, request: T2SRequest):
-        with self.device:
-            for i in self.kv_cache:
-                i.empty()
-
-            decoder = self.decoder_model
-            session = T2SSession(decoder, request, device=self.device, dtype=self.dtype)
-            self.input_pos.copy_(session.input_pos)
-
-            t1 = 0.0
-            infer_speed = 0.0
-            y = session.y
-            bsz = y.size(0)
-
-            for idx in tqdm(range(1500)):
-                if idx == 0:
-                    xy_dec = decoder.h.prefill(
-                        session.xy_pos, session.attn_mask_nested, self.kv_cache
-                    )
-                    xy_dec = torch.stack([t[[-1]] for t in xy_dec.unbind()])
-                else:
-                    if (
-                        request.use_cuda_graph
-                        and self.graph is None
-                        and torch.cuda.is_available()
-                    ):
-                        self.xy_pos_.copy_(session.xy_pos)
-                        self.graph = decoder.capture(
-                            self.input_pos,
-                            self.xy_pos_,
-                            self.xy_dec_,
-                            kv_caches=self.kv_cache,
-                        )
-
-                    if self.graph:
-                        self.xy_pos_.copy_(session.xy_pos)
-                        self.graph.replay()
-                        xy_dec = self.xy_dec_.clone()
-                    else:
-                        xy_dec = decoder.h.forward(
-                            self.input_pos,
-                            session.xy_pos,
-                            self.kv_cache,
-                        )
-
-                logits = decoder.ar_predict_layer(xy_dec[:, -1])
-                self.input_pos.add_(1)
-
-                if idx == 0:
-                    logits[:, -1] = float("-inf")
-
-                samples = session.sampler.sample(
-                    logits=logits,
-                    previous_tokens=session.y,
-                    top_k=request.top_k,
-                    top_p=request.top_p,
-                    repetition_penalty=request.repetition_penalty,
-                    temperature=request.temperature,
-                )
-
-                session.y = torch.cat([session.y, samples], dim=1)
-
-                argmax_token = torch.argmax(logits, dim=-1)
-                sample_token = samples.squeeze(1)
-                EOS_mask = (argmax_token == decoder.EOS) | (
-                    sample_token == decoder.EOS
-                )
-
-                newly_done_mask = EOS_mask & (~session.completed)
-                newly_done_indices = newly_done_mask.nonzero()
-
-                if newly_done_indices.numel() > 0:
-                    session.y_results[newly_done_indices[0]] = session.y[
-                        newly_done_indices[0], session.y_len : -1
-                    ].squeeze(0)
-                    session.completed[newly_done_indices] = True
-
-                if torch.all(session.completed).item():
-                    if session.y.size(1) == 0:
-                        session.y = torch.cat(
-                            [session.y, torch.zeros_like(samples)], dim=1
-                        )
-                        tqdm.write("Bad Zero Prediction")
-                    else:
-                        tqdm.write(
-                            f"T2S Decoding EOS {session.prefill_len.tolist().__str__().strip('[]')} -> \n"
-                            f"{[i.size(0) for i in session.y_results].__str__().strip('[]')}"
-                        )
-                        tqdm.write(
-                            f"Infer Speed: {(idx - 1) / (time.perf_counter() - t1):.2f} token/s"
-                        )
-                        infer_speed = (idx - 1) / (time.perf_counter() - t1)
-                    break
-
-                if (
-                    request.early_stop_num != -1
-                    and (session.y.size(1) - session.y_len) > request.early_stop_num
-                ) or idx == 1499:
-                    for i in range(bsz):
-                        if not session.completed[i].item():
-                            session.y_results[i] = session.y[i, session.y_len :]
-                            session.completed[i] = True
-                    break
-
-                y_emb = decoder.ar_audio_embedding(session.y[:, -1:])
-                session.xy_pos = decoder.ar_audio_position.forward(
-                    self.input_pos - session.x_lens, y_emb
-                )
-
-                if idx == 2:
-                    t1 = time.perf_counter()
-
-                if idx % 100 == 0 and self.device.type == "cuda":
-                    torch.cuda.empty_cache()
-
-            if self.device.type == "cuda":
-                torch.cuda.empty_cache()
-
-            return session.y_results[: request.valid_length], infer_speed
-
-    def generate(self, request: T2SRequest) -> T2SResult:
-        try:
-            result, infer_speed = self._handle_request(request)
-            t2s_result = T2SResult(
-                result=result, infer_speed=infer_speed, status="Success"
-            )
-        except Exception as e:
-            t2s_result = T2SResult(
-                status="Error", exception=e, traceback=traceback.format_exc()
-            )
-        return t2s_result
-
-    @staticmethod
-    def load_decoder(weights_path, max_batch_size=1) -> T2SDecoder:
-        print(
-            f"Loading Text2Semantic Weights from {weights_path} with CUDA Graph (SDPA) Implement"
-        )
-        dict_s1 = torch.load(
-            weights_path, map_location="cpu", weights_only=False#, mmap=True
-        )
-        config = dict_s1["config"]
-        decoder = T2SDecoder(config, max_batch_size=max_batch_size)
-        state_dict = dict_s1["weight"]
-        decoder.load_state_dict(state_dict)
-        return decoder.eval()
--- a/GPT_SoVITS/AR/models/utils.py
+++ b/GPT_SoVITS/AR/models/utils.py
@ -262,7 +262,7 @@ def make_reject_y(y_o, y_lens):
    reject_y = []
    reject_y_lens = []
    for b in range(bs):
-        process_item_idx = torch.randint(0, 2, size=(1,))[0]
+        process_item_idx = torch.randint(0, 1, size=(1,))[0]
        if process_item_idx == 0:
            new_y = repeat_P(y_o[b])
            reject_y.append(new_y)
--- a/GPT_SoVITS/AR/modules/patched_mha_with_cache_onnx.py
+++ b/GPT_SoVITS/AR/modules/patched_mha_with_cache_onnx.py
@ -8,30 +8,30 @@ def multi_head_attention_forward_patched(
    query,
    key,
    value,
-    embed_dim_to_check,
-    num_heads,
+    embed_dim_to_check: int,
+    num_heads: int,
    in_proj_weight,
-    in_proj_bias,
-    bias_k,
-    bias_v,
-    add_zero_attn,
-    dropout_p,
-    out_proj_weight,
-    out_proj_bias,
-    training=True,
-    key_padding_mask=None,
-    need_weights=True,
-    attn_mask=None,
-    use_separate_proj_weight=False,
-    q_proj_weight=None,
-    k_proj_weight=None,
-    v_proj_weight=None,
-    static_k=None,
-    static_v=None,
-    average_attn_weights=True,
-    is_causal=False,
+    in_proj_bias: Optional[Tensor],
+    bias_k: Optional[Tensor],
+    bias_v: Optional[Tensor],
+    add_zero_attn: bool,
+    dropout_p: float,
+    out_proj_weight: Tensor,
+    out_proj_bias: Optional[Tensor],
+    training: bool = True,
+    key_padding_mask: Optional[Tensor] = None,
+    need_weights: bool = True,
+    attn_mask: Optional[Tensor] = None,
+    use_separate_proj_weight: bool = False,
+    q_proj_weight: Optional[Tensor] = None,
+    k_proj_weight: Optional[Tensor] = None,
+    v_proj_weight: Optional[Tensor] = None,
+    static_k: Optional[Tensor] = None,
+    static_v: Optional[Tensor] = None,
+    average_attn_weights: bool = True,
+    is_causal: bool = False,
    cache=None,
-):
+) -> Tuple[Tensor, Optional[Tensor]]:
    # set up shape vars
    _, _, embed_dim = query.shape
    attn_mask = _canonical_mask(
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@ -499,7 +499,7 @@ class TTS:

        if if_lora_v3 == True and os.path.exists(path_sovits) == False:
            info = path_sovits + i18n("SoVITS %s 底模缺失，无法加载相应 LoRA 权重" % model_version)
-            raise FileNotFoundError(info)
+            raise FileExistsError(info)

        # dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False)
        dict_s2 = load_sovits_new(weights_path)
@ -1578,15 +1578,16 @@ class TTS:
                max_audio = np.abs(audio).max()
                if max_audio > 1:
                    audio /= max_audio
-                audio = (audio * 32768).astype(np.int16)
-            else:
-                audio = audio.cpu().numpy()
-                audio = (audio * 32768).astype(np.int16)
+            audio = (audio * 32768).astype(np.int16)
            t2 = time.perf_counter()
            print(f"超采样用时：{t2 - t1:.3f}s")
        else:
+            # audio = audio.float() * 32768
+            # audio = audio.to(dtype=torch.int16).clamp(-32768, 32767).cpu().numpy()
+
            audio = audio.cpu().numpy()
-            audio = (audio * 32768).astype(np.int16)
+
+        audio = (audio * 32768).astype(np.int16)


        # try:
@ -1767,10 +1768,7 @@ class TTS:
            pos += chunk_len * upsample_rate

        audio = self.sola_algorithm(audio_fragments, overlapped_len * upsample_rate)
-        if padding_len > 0:
-            audio = audio[overlapped_len * upsample_rate : -padding_len * upsample_rate]
-        else:
-            audio = audio[overlapped_len * upsample_rate :]
+        audio = audio[overlapped_len * upsample_rate : -padding_len * upsample_rate]

        audio_fragments = []
        for feat_len in feat_lens:
--- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
+++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
@ -92,7 +92,7 @@ def cut0(inp):
    if not set(inp).issubset(punctuation):
        return inp
    else:
-        return "\n"
+        return "/n"


 # 凑四句一切
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
--- a/GPT_SoVITS/module/distrib.py
+++ b/GPT_SoVITS/module/distrib.py
@ -87,7 +87,7 @@ def sync_buffer(buffers, average=True):
    for buffer, handle in handles:
        handle.wait()
        if average:
-            buffer.data /= world_size()
+            buffer.data /= world_size


 def sync_grad(params):
--- a/GPT_SoVITS/s2_train_v3_lora.py
+++ b/GPT_SoVITS/s2_train_v3_lora.py
@ -55,10 +55,6 @@ def main():
        n_gpus = torch.cuda.device_count()
    else:
        n_gpus = 1
-    if n_gpus <= 1:
-        run(0, n_gpus, hps)
-        return
-
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = str(randint(20000, 55555))

@ -81,14 +77,12 @@ def run(rank, n_gpus, hps):
        writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
        writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))

-    use_ddp = n_gpus > 1
-    if use_ddp:
-        dist.init_process_group(
-            backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
-            init_method="env://?use_libuv=False",
-            world_size=n_gpus,
-            rank=rank,
-        )
+    dist.init_process_group(
+        backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
+        init_method="env://?use_libuv=False",
+        world_size=n_gpus,
+        rank=rank,
+    )
    torch.manual_seed(hps.train.seed)
    if torch.cuda.is_available():
        torch.cuda.set_device(rank)
@ -124,20 +118,15 @@ def run(rank, n_gpus, hps):
        shuffle=True,
    )
    collate_fn = TextAudioSpeakerCollate()
-    worker_count = 0 if os.name == "nt" and n_gpus <= 1 else min(2 if os.name == "nt" else 5, os.cpu_count() or 1)
-    loader_kwargs = dict(
-        num_workers=worker_count,
-        shuffle=False,
-        pin_memory=torch.cuda.is_available(),
-        collate_fn=collate_fn,
-        batch_sampler=train_sampler,
-    )
-    if worker_count > 0:
-        loader_kwargs["persistent_workers"] = True
-        loader_kwargs["prefetch_factor"] = 2 if os.name == "nt" else 3
    train_loader = DataLoader(
        train_dataset,
-        **loader_kwargs,
+        num_workers=5,
+        shuffle=False,
+        pin_memory=True,
+        collate_fn=collate_fn,
+        batch_sampler=train_sampler,
+        persistent_workers=True,
+        prefetch_factor=3,
    )
    save_root = "%s/logs_s2_%s_lora_%s" % (hps.data.exp_dir, hps.model.version, hps.train.lora_rank)
    os.makedirs(save_root, exist_ok=True)
@ -167,9 +156,7 @@ def run(rank, n_gpus, hps):

    def model2cuda(net_g, rank):
        if torch.cuda.is_available():
-            net_g = net_g.cuda(rank)
-            if use_ddp:
-                net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
+            net_g = DDP(net_g.cuda(rank), device_ids=[rank], find_unused_parameters=True)
        else:
            net_g = net_g.to(device)
        return net_g
@ -255,8 +242,6 @@ def run(rank, n_gpus, hps):
                None,
            )
        scheduler_g.step()
-    if use_ddp and dist.is_initialized():
-        dist.destroy_process_group()
    print("training done")


--- a/GPT_SoVITS/text/chinese2.py
+++ b/GPT_SoVITS/text/chinese2.py
@ -180,15 +180,10 @@ def _merge_erhua(initials: list[str], finals: list[str], word: str, pos: str) ->
 def _g2p(segments):
    phones_list = []
    word2ph = []
-    g2pw_batch_results = []
-    g2pw_batch_cursor = 0
-    processed_segments = [re.sub("[a-zA-Z]+", "", seg) for seg in segments]
-    if is_g2pw:
-        batch_inputs = [seg for seg in processed_segments if seg]
-        g2pw_batch_results = g2pw._g2pw(batch_inputs) if batch_inputs else []
-
-    for seg in processed_segments:
+    for seg in segments:
        pinyins = []
+        # Replace all English words in the sentence
+        seg = re.sub("[a-zA-Z]+", "", seg)
        seg_cut = psg.lcut(seg)
        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
        initials = []
@ -209,10 +204,8 @@ def _g2p(segments):
            finals = sum(finals, [])
            print("pypinyin结果", initials, finals)
        else:
-            # g2pw采用整句推理（批量推理，逐句取结果）
-            if seg:
-                pinyins = g2pw_batch_results[g2pw_batch_cursor]
-                g2pw_batch_cursor += 1
+            # g2pw采用整句推理
+            pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3)

            pre_word_length = 0
            for word, pos in seg_cut:
--- a/GPT_SoVITS/text/g2pw/dataset.py
+++ b/GPT_SoVITS/text/g2pw/dataset.py
@ -18,7 +18,6 @@ Credits

 from typing import Dict
 from typing import List
-from typing import Optional
 from typing import Tuple

 import numpy as np
@ -38,8 +37,6 @@ def prepare_onnx_input(
    use_mask: bool = False,
    window_size: int = None,
    max_len: int = 512,
-    char2id: Optional[Dict[str, int]] = None,
-    char_phoneme_masks: Optional[Dict[str, List[int]]] = None,
 ) -> Dict[str, np.array]:
    if window_size is not None:
        truncated_texts, truncated_query_ids = _truncate_texts(
@ -51,88 +48,33 @@ def prepare_onnx_input(
    phoneme_masks = []
    char_ids = []
    position_ids = []
-    tokenized_cache = {}
-
-    if char2id is None:
-        char2id = {char: idx for idx, char in enumerate(chars)}
-    if use_mask:
-        if char_phoneme_masks is None:
-            char_phoneme_masks = {
-                char: [1 if i in char2phonemes[char] else 0 for i in range(len(labels))]
-                for char in char2phonemes
-            }
-    else:
-        full_phoneme_mask = [1] * len(labels)

    for idx in range(len(texts)):
        text = (truncated_texts if window_size else texts)[idx].lower()
        query_id = (truncated_query_ids if window_size else query_ids)[idx]

-        cached = tokenized_cache.get(text)
-        if cached is None:
-            try:
-                tokens, text2token, token2text = tokenize_and_map(tokenizer=tokenizer, text=text)
-            except Exception:
-                print(f'warning: text "{text}" is invalid')
-                return {}
+        try:
+            tokens, text2token, token2text = tokenize_and_map(tokenizer=tokenizer, text=text)
+        except Exception:
+            print(f'warning: text "{text}" is invalid')
+            return {}

-            if len(tokens) <= max_len - 2:
-                processed_tokens = ["[CLS]"] + tokens + ["[SEP]"]
-                shared_input_id = list(np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
-                shared_token_type_id = list(np.zeros((len(processed_tokens),), dtype=int))
-                shared_attention_mask = list(np.ones((len(processed_tokens),), dtype=int))
-                cached = {
-                    "is_short": True,
-                    "tokens": tokens,
-                    "text2token": text2token,
-                    "token2text": token2text,
-                    "input_id": shared_input_id,
-                    "token_type_id": shared_token_type_id,
-                    "attention_mask": shared_attention_mask,
-                }
-            else:
-                cached = {
-                    "is_short": False,
-                    "tokens": tokens,
-                    "text2token": text2token,
-                    "token2text": token2text,
-                }
-            tokenized_cache[text] = cached
+        text, query_id, tokens, text2token, token2text = _truncate(
+            max_len=max_len, text=text, query_id=query_id, tokens=tokens, text2token=text2token, token2text=token2text
+        )

-        if cached["is_short"]:
-            text_for_query = text
-            query_id_for_query = query_id
-            text2token_for_query = cached["text2token"]
-            input_id = cached["input_id"]
-            token_type_id = cached["token_type_id"]
-            attention_mask = cached["attention_mask"]
-        else:
-            (
-                text_for_query,
-                query_id_for_query,
-                tokens_for_query,
-                text2token_for_query,
-                _token2text_for_query,
-            ) = _truncate(
-                max_len=max_len,
-                text=text,
-                query_id=query_id,
-                tokens=cached["tokens"],
-                text2token=cached["text2token"],
-                token2text=cached["token2text"],
-            )
-            processed_tokens = ["[CLS]"] + tokens_for_query + ["[SEP]"]
-            input_id = list(np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
-            token_type_id = list(np.zeros((len(processed_tokens),), dtype=int))
-            attention_mask = list(np.ones((len(processed_tokens),), dtype=int))
+        processed_tokens = ["[CLS]"] + tokens + ["[SEP]"]

-        query_char = text_for_query[query_id_for_query]
-        if use_mask:
-            phoneme_mask = char_phoneme_masks[query_char]
-        else:
-            phoneme_mask = full_phoneme_mask
-        char_id = char2id[query_char]
-        position_id = text2token_for_query[query_id_for_query] + 1  # [CLS] token locate at first place
+        input_id = list(np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
+        token_type_id = list(np.zeros((len(processed_tokens),), dtype=int))
+        attention_mask = list(np.ones((len(processed_tokens),), dtype=int))
+
+        query_char = text[query_id]
+        phoneme_mask = (
+            [1 if i in char2phonemes[query_char] else 0 for i in range(len(labels))] if use_mask else [1] * len(labels)
+        )
+        char_id = chars.index(query_char)
+        position_id = text2token[query_id] + 1  # [CLS] token locate at first place

        input_ids.append(input_id)
        token_type_ids.append(token_type_id)
@ -141,15 +83,10 @@ def prepare_onnx_input(
        char_ids.append(char_id)
        position_ids.append(position_id)

-    max_token_length = max(len(seq) for seq in input_ids)
-
-    def _pad_sequences(sequences, pad_value=0):
-        return [seq + [pad_value] * (max_token_length - len(seq)) for seq in sequences]
-
    outputs = {
-        "input_ids": np.array(_pad_sequences(input_ids, pad_value=0)).astype(np.int64),
-        "token_type_ids": np.array(_pad_sequences(token_type_ids, pad_value=0)).astype(np.int64),
-        "attention_masks": np.array(_pad_sequences(attention_masks, pad_value=0)).astype(np.int64),
+        "input_ids": np.array(input_ids).astype(np.int64),
+        "token_type_ids": np.array(token_type_ids).astype(np.int64),
+        "attention_masks": np.array(attention_masks).astype(np.int64),
        "phoneme_masks": np.array(phoneme_masks).astype(np.float32),
        "char_ids": np.array(char_ids).astype(np.int64),
        "position_ids": np.array(position_ids).astype(np.int64),
--- a/GPT_SoVITS/text/g2pw/onnx_api.py
+++ b/GPT_SoVITS/text/g2pw/onnx_api.py
@ -10,6 +10,7 @@ from typing import Any, Dict, List, Tuple
 import numpy as np
 import onnxruntime
 import requests
+import torch
 from opencc import OpenCC
 from pypinyin import Style, pinyin
 from transformers.models.auto.tokenization_auto import AutoTokenizer
@ -21,8 +22,9 @@ from .utils import load_config
 onnxruntime.set_default_logger_severity(3)
 try:
    onnxruntime.preload_dlls()
-except Exception:
+except:
    pass
+    # traceback.print_exc()
 warnings.filterwarnings("ignore")

 model_version = "1.1"
@ -53,24 +55,6 @@ def predict(session, onnx_input: Dict[str, Any], labels: List[str]) -> Tuple[Lis
    return all_preds, all_confidences


-def _load_json_from_candidates(filename: str, candidate_dirs: List[str]) -> Dict[str, Any]:
-    for candidate_dir in candidate_dirs:
-        if not candidate_dir:
-            continue
-        json_path = os.path.join(candidate_dir, filename)
-        if os.path.exists(json_path):
-            with open(json_path, "r", encoding="utf-8") as fr:
-                return json.load(fr)
-    raise FileNotFoundError(f"Cannot locate {filename} in candidate dirs: {candidate_dirs}")
-
-
-def _find_first_existing_file(*paths: str) -> str:
-    for path in paths:
-        if path and os.path.exists(path):
-            return path
-    raise FileNotFoundError(f"Files not found: {paths}")
-
-
 def download_and_decompress(model_dir: str = "G2PWModel/"):
    if not os.path.exists(model_dir):
        parent_directory = os.path.dirname(model_dir)
@ -78,7 +62,7 @@ def download_and_decompress(model_dir: str = "G2PWModel/"):
        extract_dir = os.path.join(parent_directory, "G2PWModel_1.1")
        extract_dir_new = os.path.join(parent_directory, "G2PWModel")
        print("Downloading g2pw model...")
-        modelscope_url = "https://www.modelscope.cn/models/kamiorinn/g2pw/resolve/master/G2PWModel_1.1.zip"
+        modelscope_url = "https://www.modelscope.cn/models/kamiorinn/g2pw/resolve/master/G2PWModel_1.1.zip"  # "https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
        with requests.get(modelscope_url, stream=True) as r:
            r.raise_for_status()
            with open(zip_dir, "wb") as f:
@ -95,7 +79,7 @@ def download_and_decompress(model_dir: str = "G2PWModel/"):
    return model_dir


-class _G2PWBaseOnnxConverter:
+class G2PWOnnxConverter:
    def __init__(
        self,
        model_dir: str = "G2PWModel/",
@ -103,16 +87,33 @@ class _G2PWBaseOnnxConverter:
        model_source: str = None,
        enable_non_tradional_chinese: bool = False,
    ):
-        self.model_dir = download_and_decompress(model_dir)
-        self.config = load_config(config_path=os.path.join(self.model_dir, "config.py"), use_default=True)
+        uncompress_path = download_and_decompress(model_dir)
+
+        sess_options = onnxruntime.SessionOptions()
+        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+        sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
+        sess_options.intra_op_num_threads = 2 if torch.cuda.is_available() else 0
+        if "CUDAExecutionProvider" in onnxruntime.get_available_providers():
+            self.session_g2pW = onnxruntime.InferenceSession(
+                os.path.join(uncompress_path, "g2pW.onnx"),
+                sess_options=sess_options,
+                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+            )
+        else:
+            self.session_g2pW = onnxruntime.InferenceSession(
+                os.path.join(uncompress_path, "g2pW.onnx"),
+                sess_options=sess_options,
+                providers=["CPUExecutionProvider"],
+            )
+        self.config = load_config(config_path=os.path.join(uncompress_path, "config.py"), use_default=True)

        self.model_source = model_source if model_source else self.config.model_source
        self.enable_opencc = enable_non_tradional_chinese
+
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_source)

-        polyphonic_chars_path = os.path.join(self.model_dir, "POLYPHONIC_CHARS.txt")
-        monophonic_chars_path = os.path.join(self.model_dir, "MONOPHONIC_CHARS.txt")
-
+        polyphonic_chars_path = os.path.join(uncompress_path, "POLYPHONIC_CHARS.txt")
+        monophonic_chars_path = os.path.join(uncompress_path, "MONOPHONIC_CHARS.txt")
        self.polyphonic_chars = [
            line.split("\t") for line in open(polyphonic_chars_path, encoding="utf-8").read().strip().split("\n")
        ]
@ -148,47 +149,31 @@ class _G2PWBaseOnnxConverter:
        )

        self.chars = sorted(list(self.char2phonemes.keys()))
-        self.char2id = {char: idx for idx, char in enumerate(self.chars)}
-        self.char_phoneme_masks = (
-            {
-                char: [1 if i in self.char2phonemes[char] else 0 for i in range(len(self.labels))]
-                for char in self.char2phonemes
-            }
-            if self.config.use_mask
-            else None
-        )

        self.polyphonic_chars_new = set(self.chars)
        for char in self.non_polyphonic:
-            self.polyphonic_chars_new.discard(char)
+            if char in self.polyphonic_chars_new:
+                self.polyphonic_chars_new.remove(char)

        self.monophonic_chars_dict = {char: phoneme for char, phoneme in self.monophonic_chars}
        for char in self.non_monophonic:
-            self.monophonic_chars_dict.pop(char, None)
+            if char in self.monophonic_chars_dict:
+                self.monophonic_chars_dict.pop(char)

-        default_asset_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "G2PWModel"))
-        candidate_asset_dirs = [self.model_dir, default_asset_dir]
-        self.bopomofo_convert_dict = _load_json_from_candidates(
-            "bopomofo_to_pinyin_wo_tune_dict.json", candidate_asset_dirs
-        )
-        self.char_bopomofo_dict = _load_json_from_candidates("char_bopomofo_dict.json", candidate_asset_dirs)
+        self.pos_tags = ["UNK", "A", "C", "D", "I", "N", "P", "T", "V", "DE", "SHI"]

+        with open(os.path.join(uncompress_path, "bopomofo_to_pinyin_wo_tune_dict.json"), "r", encoding="utf-8") as fr:
+            self.bopomofo_convert_dict = json.load(fr)
        self.style_convert_func = {
            "bopomofo": lambda x: x,
            "pinyin": self._convert_bopomofo_to_pinyin,
        }[style]

+        with open(os.path.join(uncompress_path, "char_bopomofo_dict.json"), "r", encoding="utf-8") as fr:
+            self.char_bopomofo_dict = json.load(fr)
+
        if self.enable_opencc:
            self.cc = OpenCC("s2tw")
-        self.enable_sentence_dedup = os.getenv("g2pw_sentence_dedup", "true").strip().lower() in {
-            "1",
-            "true",
-            "yes",
-            "y",
-            "on",
-        }
-        # 聚焦到多音字附近上下文，默认左右各16字；设为0表示关闭裁剪（整句）。
-        self.polyphonic_context_chars = max(0, int(os.getenv("g2pw_polyphonic_context_chars", "16")))

    def _convert_bopomofo_to_pinyin(self, bopomofo: str) -> str:
        tone = bopomofo[-1]
@ -196,8 +181,9 @@ class _G2PWBaseOnnxConverter:
        component = self.bopomofo_convert_dict.get(bopomofo[:-1])
        if component:
            return component + tone
-        print(f'Warning: "{bopomofo}" cannot convert to pinyin')
-        return None
+        else:
+            print(f'Warning: "{bopomofo}" cannot convert to pinyin')
+            return None

    def __call__(self, sentences: List[str]) -> List[List[str]]:
        if isinstance(sentences, str):
@ -211,147 +197,51 @@ class _G2PWBaseOnnxConverter:
                translated_sentences.append(translated_sent)
            sentences = translated_sentences

-        texts, model_query_ids, result_query_ids, sent_ids, partial_results = self._prepare_data(sentences=sentences)
+        texts, query_ids, sent_ids, partial_results = self._prepare_data(sentences=sentences)
        if len(texts) == 0:
+            # sentences no polyphonic words
            return partial_results

-        model_input = prepare_onnx_input(
+        onnx_input = prepare_onnx_input(
            tokenizer=self.tokenizer,
            labels=self.labels,
            char2phonemes=self.char2phonemes,
            chars=self.chars,
            texts=texts,
-            query_ids=model_query_ids,
+            query_ids=query_ids,
            use_mask=self.config.use_mask,
            window_size=None,
-            char2id=self.char2id,
-            char_phoneme_masks=self.char_phoneme_masks,
        )

-        if not model_input:
-            return partial_results
-
-        if self.enable_sentence_dedup:
-            preds, _confidences = self._predict_with_sentence_dedup(model_input=model_input, texts=texts)
-        else:
-            preds, _confidences = self._predict(model_input=model_input)
-
+        preds, confidences = predict(session=self.session_g2pW, onnx_input=onnx_input, labels=self.labels)
        if self.config.use_char_phoneme:
            preds = [pred.split(" ")[1] for pred in preds]

        results = partial_results
-        for sent_id, query_id, pred in zip(sent_ids, result_query_ids, preds):
+        for sent_id, query_id, pred in zip(sent_ids, query_ids, preds):
            results[sent_id][query_id] = self.style_convert_func(pred)

        return results

-    def _prepare_data(
-        self, sentences: List[str]
-    ) -> Tuple[List[str], List[int], List[int], List[int], List[List[str]]]:
-        texts, model_query_ids, result_query_ids, sent_ids, partial_results = [], [], [], [], []
+    def _prepare_data(self, sentences: List[str]) -> Tuple[List[str], List[int], List[int], List[List[str]]]:
+        texts, query_ids, sent_ids, partial_results = [], [], [], []
        for sent_id, sent in enumerate(sentences):
+            # pypinyin works well for Simplified Chinese than Traditional Chinese
            sent_s = tranditional_to_simplified(sent)
            pypinyin_result = pinyin(sent_s, neutral_tone_with_five=True, style=Style.TONE3)
            partial_result = [None] * len(sent)
-            polyphonic_indices: List[int] = []
            for i, char in enumerate(sent):
                if char in self.polyphonic_chars_new:
-                    polyphonic_indices.append(i)
+                    texts.append(sent)
+                    query_ids.append(i)
+                    sent_ids.append(sent_id)
                elif char in self.monophonic_chars_dict:
                    partial_result[i] = self.style_convert_func(self.monophonic_chars_dict[char])
                elif char in self.char_bopomofo_dict:
                    partial_result[i] = pypinyin_result[i][0]
+                    # partial_result[i] =  self.style_convert_func(self.char_bopomofo_dict[char][0])
                else:
                    partial_result[i] = pypinyin_result[i][0]

-            if polyphonic_indices:
-                if self.polyphonic_context_chars > 0:
-                    left = max(0, polyphonic_indices[0] - self.polyphonic_context_chars)
-                    right = min(len(sent), polyphonic_indices[-1] + self.polyphonic_context_chars + 1)
-                    sent_for_predict = sent[left:right]
-                    query_offset = left
-                else:
-                    sent_for_predict = sent
-                    query_offset = 0
-
-                for index in polyphonic_indices:
-                    texts.append(sent_for_predict)
-                    model_query_ids.append(index - query_offset)
-                    result_query_ids.append(index)
-                    sent_ids.append(sent_id)
-
            partial_results.append(partial_result)
-        return texts, model_query_ids, result_query_ids, sent_ids, partial_results
-
-    def _predict(self, model_input: Dict[str, Any]) -> Tuple[List[str], List[float]]:
-        raise NotImplementedError
-
-    def _predict_with_sentence_dedup(
-        self, model_input: Dict[str, Any], texts: List[str]
-    ) -> Tuple[List[str], List[float]]:
-        if len(texts) <= 1:
-            return self._predict(model_input=model_input)
-
-        grouped_indices: Dict[str, List[int]] = {}
-        for idx, text in enumerate(texts):
-            grouped_indices.setdefault(text, []).append(idx)
-
-        if all(len(indices) == 1 for indices in grouped_indices.values()):
-            return self._predict(model_input=model_input)
-
-        preds: List[str] = [""] * len(texts)
-        confidences: List[float] = [0.0] * len(texts)
-        for indices in grouped_indices.values():
-            group_input = {name: value[indices] for name, value in model_input.items()}
-            if len(indices) > 1:
-                for name in ("input_ids", "token_type_ids", "attention_masks"):
-                    group_input[name] = group_input[name][:1]
-
-            group_preds, group_confidences = self._predict(model_input=group_input)
-            for output_idx, pred, confidence in zip(indices, group_preds, group_confidences):
-                preds[output_idx] = pred
-                confidences[output_idx] = confidence
-
-        return preds, confidences
-
-
-class G2PWOnnxConverter(_G2PWBaseOnnxConverter):
-    def __init__(
-        self,
-        model_dir: str = "G2PWModel/",
-        style: str = "bopomofo",
-        model_source: str = None,
-        enable_non_tradional_chinese: bool = False,
-    ):
-        super().__init__(
-            model_dir=model_dir,
-            style=style,
-            model_source=model_source,
-            enable_non_tradional_chinese=enable_non_tradional_chinese,
-        )
-
-        sess_options = onnxruntime.SessionOptions()
-        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
-        sess_options.intra_op_num_threads = 2
-
-        onnx_path = _find_first_existing_file(
-            os.path.join(self.model_dir, "g2pW.onnx"),
-            os.path.join(self.model_dir, "g2pw.onnx"),
-        )
-
-        if "CUDAExecutionProvider" in onnxruntime.get_available_providers():
-            self.session_g2pw = onnxruntime.InferenceSession(
-                onnx_path,
-                sess_options=sess_options,
-                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
-            )
-        else:
-            self.session_g2pw = onnxruntime.InferenceSession(
-                onnx_path,
-                sess_options=sess_options,
-                providers=["CPUExecutionProvider"],
-            )
-
-    def _predict(self, model_input: Dict[str, Any]) -> Tuple[List[str], List[float]]:
-        return predict(session=self.session_g2pw, onnx_input=model_input, labels=self.labels)
+        return texts, query_ids, sent_ids, partial_results
--- a/README.md
+++ b/README.md
@ -48,8 +48,6 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-

 请不要尬黑GPT-SoVITS推理速度慢，谢谢！

-CPU-Optimized Inference Version：https://github.com/baicai-1145/GPT-SoVITS-CPUFast
-
 **User guide: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**

 ## Installation
--- a/docs/cn/Changelog_CN.md
+++ b/docs/cn/Changelog_CN.md
@ -594,11 +594,11 @@
  - 内容: 修复实验名结尾出现空格在win中路径不正确的问题
  - 类型: 修复
  - 提交: RVC-Boss
- 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
+- 2025.06.10 [Commit#746cb536](https://github.com/RVC-Boss/GPT-SoVITS/commit/746cb536c68b1fe6ce3ca7e882235375b8a8dd89)
  - 内容: 语种分割优化
  - 类型: 优化
  - 提交: KamioRinn
- 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
+- 2025.06.11 [Commit#dd2b9253](https://github.com/RVC-Boss/GPT-SoVITS/commit/dd2b9253aabb09db32db7a3344570ed9df043351)
  - 内容: 修复并行推理对v2pro支持bug
  - 类型: 修复
  - 提交: YYuX-1145
@ -606,132 +606,21 @@
  - 内容: v2pro对ge提取时会出现数值溢出的问题修复
  - 类型: 修复
  - 提交: RVC-Boss
- 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
+- 2025.06.11 [Commit#37f5abfc](https://github.com/RVC-Boss/GPT-SoVITS/commit/6fdc67ca83418306f11e90b9139278313ac5c3e9)[Commit#6fdc67ca](https://github.com/RVC-Boss/GPT-SoVITS/commit/37f5abfcb4a6553652235909db2e124b6f8ff3a5)
  - 内容: install.sh逻辑优化
  - 类型: 优化
  - 提交: XXXXRT666
- 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
+- 2025.06.27 [Commit#90ebefa7](https://github.com/RVC-Boss/GPT-SoVITS/commit/90ebefa78fd544da36eebe0b2003620879c921b0)
  - 内容: onnxruntime加载逻辑优化（对gpu/cpu的判断）
  - 类型: 优化
  - 提交: KamioRinn
- 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
+- 2025.06.27 [Commit#6df61f58](https://github.com/RVC-Boss/GPT-SoVITS/commit/6df61f58e4d18d4c2ad9d1eddd6a1bd690034c23)
  - 内容: 语言分割及格式化优化
  - 类型: 优化
  - 提交: KamioRinn
-
-## 202507
-
 - 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
  - 内容: 提升推理进程优先级（修复win11下可能GPU利用率受限的问题）
-  - 类型: 优化
+  - 类型: 修复
  - 提交: XianYue0125
- 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
-  - 内容: 解决 TTS.py 无法识别真正支持版本 v2Pro、v2ProPlus 的问题, 同时更新一版默认配置。
-  - 类型: 修复
-  - 提交: jiangsier-xyz
- 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
-  - 内容: 修复并行推理模式下v2pro模型识别问题
-  - 类型: 修复
-  - 提交: RVC-Boss
- 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
-  - 内容: whisper asr支持性价比更高的distill模型
-  - 类型: 优化
-  - 提交: XXXXRT666
- 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
-  - 内容: 优化TTS_Config的代码逻辑
-  - 类型: 优化
-  - 提交: ChasonJiang
- 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
-  - 内容: 修复gpt的loss计算问题
-  - 类型: 修复
-  - 提交: ChasonJiang

-## 202508

- 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
-  - 内容: WSL Rocm
-  - 类型: 修复
-  - 提交: XXXXRT666
-
-## 202509
-
- 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
-  - 内容: 修复环境变量可能不为str的问题
-  - 类型: 修复
-  - 提交: RVC-Boss
-
-## 202511
-
- 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
-  - 内容: 流式推理
-  - 类型: 新功能
-  - 提交: ChasonJiang
- 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
-  - 内容: 数学计算文本前端逻辑优化
-  - 类型: 优化
-  - 提交: KamioRinn
- 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
-  - 内容: 流式推理
-  - 类型: 新功能
-  - 提交: L-jasmine
- 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
-  - 内容: 支持vq分布式训练
-  - 类型: 优化
-  - 提交: wzy3650
- 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
-  - 内容: ASR模型下载逻辑优化
-  - 类型: 优化
-  - 提交: XXXXRT666
- 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
-  - 内容: default batch size bug 修复
-  - 类型: 修复
-  - 提交: Spr-Aachen
-
-## 202512
-
- 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
-  - 内容: 修复采样错误
-  - 类型: 修复
-  - 提交: ChasonJiang
-
-## 202602
-
- 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
-  - 内容: 修复 Conda 条款未同意导致的构建失败
-  - 类型: 修复
-  - 提交: Oarora
- 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
-  - 内容: 环境自动构建优化
-  - 类型: 优化
-  - 提交: XXXXRT666
-
-## 202604
-
- 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
-  - 内容: 优化 G2PW 的推理输入构造与多音字处理流程，减少重复计算，降低长句场景下的推理开销
-  - 类型: 优化
-  - 提交: baicai-1145
- 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
-  - 内容: 改进 Windows 单卡 v3 LoRA 训练流程
-  - 类型: 优化
-  - 提交: 2409324124
- 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
-  - 内容: 修复多个模块中的独立 bug
-  - 类型: 修复
-  - 提交: wishhyt
- 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
-  - 内容: 添加数据集的错误处理提示
-  - 类型: 优化
-  - 提交: mushroomcowisheggs
- 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
-  - 内容: 并行推理部分bug修复
-  - 类型: 修复
-  - 提交: wishhyt
- 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
-  - 内容: bug修复：DPO 训练不支持漏字模拟
-  - 类型: 修复
-  - 提交: Mr-Neutr0n
- 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
-  - 内容: 修复onnx脚本未导入Optional等的问题
-  - 类型: 修复
-  - 提交: RVC-Boss
--- a/docs/en/Changelog_EN.md
+++ b/docs/en/Changelog_EN.md
@ -578,160 +578,3 @@
  - Content: Optimized automatic precision detection logic; added collapsible functionality to WebUI frontend modules.
  - Type: New Feature
  - Contributors: XXXXRT666, RVC-Boss
- 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
-  - Content: Fix polyphone detection for "X一X" pattern
-  - Type: Fix
-  - Contributor: wzy3650
- 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
-  - Content: Config fix; fix SoVITS model loading
-  - Type: Fix
-  - Contributor: wzy3650
- 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
-  - Content: Fix possible numerical explosion of `ge.sum` causing silent inference
-  - Type: Fix
-  - Contributor: RVC-Boss
- 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
-  - Content: Fix incorrect Windows path when experiment name ends with a space
-  - Type: Fix
-  - Contributor: RVC-Boss
- 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
-  - Content: Optimize language segmentation
-  - Type: Optimization
-  - Contributor: KamioRinn
- 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
-  - Content: Fix bug in parallel inference support for v2pro
-  - Type: Fix
-  - Contributor: YYuX-1145
- 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
-  - Content: Fix numerical overflow issue when extracting `ge` for v2pro
-  - Type: Fix
-  - Contributor: RVC-Boss
- 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
-  - Content: Optimize `install.sh` logic
-  - Type: Optimization
-  - Contributor: XXXXRT666
- 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
-  - Content: Optimize onnxruntime loading logic (GPU/CPU detection)
-  - Type: Optimization
-  - Contributor: KamioRinn
- 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
-  - Content: Optimize language segmentation and formatting
-  - Type: Optimization
-  - Contributor: KamioRinn
-
-## 202507
-
- 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
-  - Content: Increase inference process priority (fix possible GPU utilization limitation on Win11)
-  - Type: Optimization
-  - Contributor: XianYue0125
- 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
-  - Content: Fix TTS.py not recognizing actually supported versions v2Pro and v2ProPlus, and update default configuration
-  - Type: Fix
-  - Contributor: jiangsier-xyz
- 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
-  - Content: Fix v2pro model recognition issue in parallel inference mode
-  - Type: Fix
-  - Contributor: RVC-Boss
- 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
-  - Content: Whisper ASR supports more cost-effective distill models
-  - Type: Optimization
-  - Contributor: XXXXRT666
- 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
-  - Content: Optimize `TTS_Config` code logic
-  - Type: Optimization
-  - Contributor: ChasonJiang
- 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
-  - Content: Fix GPT loss calculation issue
-  - Type: Fix
-  - Contributor: ChasonJiang
-
-## 202508
-
- 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
-  - Content: WSL Rocm
-  - Type: Fix
-  - Contributor: XXXXRT666
-
-## 202509
-
- 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
-  - Content: Fix issue where environment variable may not be a string
-  - Type: Fix
-  - Contributor: RVC-Boss
-
-## 202511
-
- 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
-  - Content: Streaming inference
-  - Type: New Feature
-  - Contributor: ChasonJiang
- 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
-  - Content: Optimize text frontend logic for mathematical expression text
-  - Type: Optimization
-  - Contributor: KamioRinn
- 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
-  - Content: Streaming inference
-  - Type: New Feature
-  - Contributor: L-jasmine
- 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
-  - Content: Support VQ distributed training
-  - Type: Optimization
-  - Contributor: wzy3650
- 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
-  - Content: Optimize ASR model download logic
-  - Type: Optimization
-  - Contributor: XXXXRT666
- 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
-  - Content: Fix default batch size bug
-  - Type: Fix
-  - Contributor: Spr-Aachen
-
-## 202512
-
- 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
-  - Content: Fix sampling error
-  - Type: Fix
-  - Contributor: ChasonJiang
-
-## 202602
-
- 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
-  - Content: Fix build failure caused by unaccepted Conda terms
-  - Type: Fix
-  - Contributor: Oarora
- 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
-  - Content: Optimize automatic environment setup
-  - Type: Optimization
-  - Contributor: XXXXRT666
-
-## 202604
-
- 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
-  - Content: Optimize G2PW inference input construction and polyphone handling to reduce redundant computation and inference overhead for long sentences
-  - Type: Optimization
-  - Contributor: baicai-1145
- 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
-  - Content: Improve the LoRA training flow for GPT-SoVITS v3 on a single card under Windows
-  - Type: Optimization
-  - Contributor: 2409324124
- 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
-  - Content: Fix miscellaneous bugs in multiple modules
-  - Type: Fix
-  - Contributor: wishhyt
- 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
-  - Content: Add error handling hints for dataset processing
-  - Type: Optimization
-  - Contributor: mushroomcowisheggs
- 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
-  - Content: Fix some bugs in parallel inference
-  - Type: Fix
-  - Contributor: wishhyt
- 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
-  - Content: Fix bug where DPO training does not support missing word simulation
-  - Type: Fix
-  - Contributor: Mr-Neutr0n
- 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
-  - Content: Fix missing imports (e.g., Optional) in ONNX script
-  - Type: Fix
-  - Contributor: RVC-Boss
--- a/docs/ja/Changelog_JA.md
+++ b/docs/ja/Changelog_JA.md
@ -578,160 +578,3 @@
  - 内容: 自動精度検出ロジックを最適化し、WebUI フロントエンドモジュールに折り畳み（Collapsible）機能を追加
  - タイプ: 新機能
  - 貢献者: XXXXRT666, RVC-Boss
- 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
-  - 内容: 「X一X」パターンの多音字検出を修正
-  - タイプ: 修正
-  - 貢献者: wzy3650
- 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
-  - 内容: 設定の修正；SoVITSモデル読み込みの修正
-  - タイプ: 修正
-  - 貢献者: wzy3650
- 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
-  - 内容: `ge.sum`の数値爆発による推論の無音化を修正
-  - タイプ: 修正
-  - 貢献者: RVC-Boss
- 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
-  - 内容: 実験名がスペースで終わる場合のWindowsパスの誤りを修正
-  - タイプ: 修正
-  - 貢献者: RVC-Boss
- 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
-  - 内容: 言語分割の最適化
-  - タイプ: 最適化
-  - 貢献者: KamioRinn
- 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
-  - 内容: v2proの並列推論対応におけるバグを修正
-  - タイプ: 修正
-  - 貢献者: YYuX-1145
- 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
-  - 内容: v2proの`ge`抽出時の数値オーバーフロー問題を修正
-  - タイプ: 修正
-  - 貢献者: RVC-Boss
- 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
-  - 内容: `install.sh`のロジックを最適化
-  - タイプ: 最適化
-  - 貢献者: XXXXRT666
- 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
-  - 内容: onnxruntime読み込みロジックを最適化（GPU/CPU検出）
-  - タイプ: 最適化
-  - 貢献者: KamioRinn
- 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
-  - 内容: 言語分割と書式を最適化
-  - タイプ: 最適化
-  - 貢献者: KamioRinn
-
-## 202507
-
- 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
-  - 内容: 推論プロセスの優先度を上げる（Win11でのGPU利用制限の可能性を修正）
-  - タイプ: 最適化
-  - 貢献者: XianYue0125
- 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
-  - 内容: TTS.pyが実際にサポートされているバージョンv2Proおよびv2ProPlusを認識しない問題を修正し、デフォルト設定を更新
-  - タイプ: 修正
-  - 貢献者: jiangsier-xyz
- 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
-  - 内容: 並列推論モードでのv2proモデル認識問題を修正
-  - タイプ: 修正
-  - 貢献者: RVC-Boss
- 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
-  - 内容: Whisper ASRがよりコスト効率の高い蒸留モデルをサポート
-  - タイプ: 最適化
-  - 貢献者: XXXXRT666
- 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
-  - 内容: `TTS_Config`のコードロジックを最適化
-  - タイプ: 最適化
-  - 貢献者: ChasonJiang
- 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
-  - 内容: GPT損失計算の問題を修正
-  - タイプ: 修正
-  - 貢献者: ChasonJiang
-
-## 202508
-
- 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
-  - 内容: WSL Rocm対応
-  - タイプ: 修正
-  - 貢献者: XXXXRT666
-
-## 202509
-
- 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
-  - 内容: 環境変数が文字列でない可能性がある問題を修正
-  - タイプ: 修正
-  - 貢献者: RVC-Boss
-
-## 202511
-
- 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
-  - 内容: ストリーミング推論
-  - タイプ: 新機能
-  - 貢献者: ChasonJiang
- 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
-  - 内容: 数式テキストに対するテキスト前処理ロジックを最適化
-  - タイプ: 最適化
-  - 貢献者: KamioRinn
- 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
-  - 内容: ストリーミング推論
-  - タイプ: 新機能
-  - 貢献者: L-jasmine
- 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
-  - 内容: VQ分散学習をサポート
-  - タイプ: 最適化
-  - 貢献者: wzy3650
- 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
-  - 内容: ASRモデルダウンロードロジックを最適化
-  - タイプ: 最適化
-  - 貢献者: XXXXRT666
- 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
-  - 内容: デフォルトのバッチサイズのバグを修正
-  - タイプ: 修正
-  - 貢献者: Spr-Aachen
-
-## 202512
-
- 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
-  - 内容: サンプリングエラーを修正
-  - タイプ: 修正
-  - 貢献者: ChasonJiang
-
-## 202602
-
- 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
-  - 内容: 受け入れられなかったConda利用規約によるビルド失敗を修正
-  - タイプ: 修正
-  - 貢献者: Oarora
- 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
-  - 内容: 自動環境セットアップを最適化
-  - タイプ: 最適化
-  - 貢献者: XXXXRT666
-
-## 202604
-
- 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
-  - 内容: G2PW推論入力の構築と多音字処理を最適化し、長文における冗長な計算と推論オーバーヘッドを削減
-  - タイプ: 最適化
-  - 貢献者: baicai-1145
- 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
-  - 内容: WindowsでのシングルカードにおけるGPT-SoVITS v3のLoRAトレーニングフローを改善
-  - タイプ: 最適化
-  - 貢献者: 2409324124
- 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
-  - 内容: 複数モジュールの雑多なバグを修正
-  - タイプ: 修正
-  - 貢献者: wishhyt
- 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
-  - 内容: データセット処理時のエラーハンドリングヒントを追加
-  - タイプ: 最適化
-  - 貢献者: mushroomcowisheggs
- 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
-  - 内容: 並列推論の一部バグを修正
-  - タイプ: 修正
-  - 貢献者: wishhyt
- 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
-  - 内容: DPOトレーニングが欠落単語シミュレーションをサポートしないバグを修正
-  - タイプ: 修正
-  - 貢献者: Mr-Neutr0n
- 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
-  - 内容: ONNXスクリプトでの（Optionalなどの）不足インポートを修正
-  - タイプ: 修正
-  - 貢献者: RVC-Boss
--- a/docs/ko/Changelog_KO.md
+++ b/docs/ko/Changelog_KO.md
@ -578,160 +578,3 @@
  - 내용: 자동 정밀도 감지 로직 최적화; WebUI 프론트엔드 모듈에 접기 기능 추가
  - 유형: 신규 기능
  - 기여자: XXXXRT666, RVC-Boss
- 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
-  - 내용: "X一X" 패턴의 다중 발음 감지 오류 수정
-  - 유형: 수정
-  - 기여자: wzy3650
- 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
-  - 내용: 설정 오류 수정; SoVITS 모델 로딩 오류 수정
-  - 유형: 수정
-  - 기여자: wzy3650
- 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
-  - 내용: `ge.sum`의 수치 폭발 가능성으로 인한 추론 무음 현상 수정
-  - 유형: 수정
-  - 기여자: RVC-Boss
- 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
-  - 내용: 실험 이름이 공백으로 끝날 때 발생하는 잘못된 Windows 경로 문제 수정
-  - 유형: 수정
-  - 기여자: RVC-Boss
- 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
-  - 내용: 언어 분할 최적화
-  - 유형: 최적화
-  - 기여자: KamioRinn
- 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
-  - 내용: v2pro 병렬 추론 지원 버그 수정
-  - 유형: 수정
-  - 기여자: YYuX-1145
- 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
-  - 내용: v2pro의 `ge` 추출 시 수치 오버플로우 문제 수정
-  - 유형: 수정
-  - 기여자: RVC-Boss
- 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
-  - 내용: `install.sh` 로직 최적화
-  - 유형: 최적화
-  - 기여자: XXXXRT666
- 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
-  - 내용: onnxruntime 로딩 로직 최적화 (GPU/CPU 감지)
-  - 유형: 최적화
-  - 기여자: KamioRinn
- 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
-  - 내용: 언어 분할 및 형식 최적화
-  - 유형: 최적화
-  - 기여자: KamioRinn
-
-## 202507
-
- 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
-  - 내용: 추론 프로세스 우선순위 증가 (Win11에서 GPU 활용 제한 가능성 수정)
-  - 유형: 최적화
-  - 기여자: XianYue0125
- 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
-  - 내용: TTS.py가 실제 지원되는 버전 v2Pro 및 v2ProPlus를 인식하지 못하는 문제 수정 및 기본 설정 업데이트
-  - 유형: 수정
-  - 기여자: jiangsier-xyz
- 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
-  - 내용: 병렬 추론 모드에서 v2pro 모델 인식 문제 수정
-  - 유형: 수정
-  - 기여자: RVC-Boss
- 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
-  - 내용: Whisper ASR이 더 비용 효율적인 distill 모델 지원
-  - 유형: 최적화
-  - 기여자: XXXXRT666
- 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
-  - 내용: `TTS_Config` 코드 로직 최적화
-  - 유형: 최적화
-  - 기여자: ChasonJiang
- 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
-  - 내용: GPT 손실(loss) 계산 문제 수정
-  - 유형: 수정
-  - 기여자: ChasonJiang
-
-## 202508
-
- 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
-  - 내용: WSL Rocm
-  - 유형: 수정
-  - 기여자: XXXXRT666
-
-## 202509
-
- 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
-  - 내용: 환경 변수가 문자열이 아닐 수 있는 문제 수정
-  - 유형: 수정
-  - 기여자: RVC-Boss
-
-## 202511
-
- 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
-  - 내용: 스트리밍 추론
-  - 유형: 새 기능
-  - 기여자: ChasonJiang
- 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
-  - 내용: 수학 표현식 텍스트에 대한 텍스트 전처리 로직 최적화
-  - 유형: 최적화
-  - 기여자: KamioRinn
- 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
-  - 내용: 스트리밍 추론
-  - 유형: 새 기능
-  - 기여자: L-jasmine
- 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
-  - 내용: VQ 분산 학습 지원
-  - 유형: 최적화
-  - 기여자: wzy3650
- 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
-  - 내용: ASR 모델 다운로드 로직 최적화
-  - 유형: 최적화
-  - 기여자: XXXXRT666
- 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
-  - 내용: 기본 배치 크기 버그 수정
-  - 유형: 수정
-  - 기여자: Spr-Aachen
-
-## 202512
-
- 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
-  - 내용: 샘플링 오류 수정
-  - 유형: 수정
-  - 기여자: ChasonJiang
-
-## 202602
-
- 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
-  - 내용: Conda 약관 미동의로 인한 빌드 실패 수정
-  - 유형: 수정
-  - 기여자: Oarora
- 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
-  - 내용: 자동 환경 설정 최적화
-  - 유형: 최적화
-  - 기여자: XXXXRT666
-
-## 202604
-
- 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
-  - 내용: G2PW 추론 입력 구성 및 다중 발음 처리를 최적화하여 긴 문장에 대한 중복 계산 및 추론 오버헤드 감소
-  - 유형: 최적화
-  - 기여자: baicai-1145
- 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
-  - 내용: Windows 환경 단일 GPU에서 GPT-SoVITS v3의 LoRA 학습 흐름 개선
-  - 유형: 최적화
-  - 기여자: 2409324124
- 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
-  - 내용: 여러 모듈의 잡다한 버그 수정
-  - 유형: 수정
-  - 기여자: wishhyt
- 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
-  - 내용: 데이터셋 처리를 위한 오류 처리 힌트 추가
-  - 유형: 최적화
-  - 기여자: mushroomcowisheggs
- 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
-  - 내용: 병렬 추론의 일부 버그 수정
-  - 유형: 수정
-  - 기여자: wishhyt
- 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
-  - 내용: DPO 학습이 누락 단어 시뮬레이션을 지원하지 않는 버그 수정
-  - 유형: 수정
-  - 기여자: Mr-Neutr0n
- 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
-  - 내용: ONNX 스크립트에서 Optional 등 누락된 임포트 문제 수정
-  - 유형: 수정
-  - 기여자: RVC-Boss
--- a/docs/tr/Changelog_TR.md
+++ b/docs/tr/Changelog_TR.md
@ -2,6 +2,8 @@

 ## 202401

+## 202401
+
 - 2024.01.21 [PR#108](https://github.com/RVC-Boss/GPT-SoVITS/pull/108)
  - İçerik: WebUI'ya İngilizce sistem çeviri desteği eklendi.
  - Tür: Dokümantasyon
@ -330,8 +332,6 @@
  - Tür: Optimizasyon
  - Katkıda Bulunan: RVC-Boss, GoHomeToMacDonal
  - İlgili: [PR#672](https://github.com/RVC-Boss/GPT-SoVITS/pull/672)
- Gelecek güncellemeler, `fast_inference` dalındaki değişikliklerin tutarlılığını doğrulamaya devam edecek.
-
 - 2024.07.13 [PR#1294](https://github.com/RVC-Boss/GPT-SoVITS/pull/1294), [PR#1298](https://github.com/RVC-Boss/GPT-SoVITS/pull/1298)
  - İçerik: i18n taraması yeniden düzenlendi ve çok dilli yapılandırma dosyaları güncellendi
  - Tür: Dokümantasyon
@ -578,160 +578,3 @@
  - İçerik: Otomatik hassasiyet algılama mantığı optimize edildi; WebUI önyüz modüllerine katlanabilir özellik eklendi
  - Tür: Yeni Özellik
  - Katkıda Bulunanlar: XXXXRT666, RVC-Boss
- 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
-  - İçerik: "X一X" kalıbı için çok sesli harf tespitini düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: wzy3650
- 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
-  - İçerik: Yapılandırma düzeltmesi; SoVITS model yüklemesini düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: wzy3650
- 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
-  - İçerik: `ge.sum` kaynaklı olası sayısal patlamayı (sessiz çıkarıma yol açan) düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: RVC-Boss
- 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
-  - İçerik: Deney adı boşlukla bittiğinde oluşan hatalı Windows yolunu düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: RVC-Boss
- 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
-  - İçerik: Dil bölütlemeyi optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: KamioRinn
- 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
-  - İçerik: v2pro için paralel çıkarım desteğindeki hatayı düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: YYuX-1145
- 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
-  - İçerik: v2pro için `ge` çıkarımındaki sayısal taşma sorununu düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: RVC-Boss
- 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
-  - İçerik: `install.sh` mantığını optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: XXXXRT666
- 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
-  - İçerik: onnxruntime yükleme mantığını optimize et (GPU/CPU algılama)
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: KamioRinn
- 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
-  - İçerik: Dil bölütleme ve biçimlendirmeyi optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: KamioRinn
-
-## 202507
-
- 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
-  - İçerik: Çıkarım işlem önceliğini artır (Win11'de olası GPU kullanım sınırlamasını düzelt)
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: XianYue0125
- 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
-  - İçerik: TTS.py'nin gerçekte desteklenen sürümler olan v2Pro ve v2ProPlus'ı tanımaması sorununu düzelt ve varsayılan yapılandırmayı güncelle
-  - Tür: Düzeltme
-  - Katkıda Bulunan: jiangsier-xyz
- 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
-  - İçerik: Paralel çıkarım modunda v2pro model tanıma sorununu düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: RVC-Boss
- 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
-  - İçerik: Whisper ASR daha uygun maliyetli distill modellerini destekler
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: XXXXRT666
- 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
-  - İçerik: `TTS_Config` kod mantığını optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: ChasonJiang
- 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
-  - İçerik: GPT kayıp (loss) hesaplama sorununu düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: ChasonJiang
-
-## 202508
-
- 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
-  - İçerik: WSL Rocm
-  - Tür: Düzeltme
-  - Katkıda Bulunan: XXXXRT666
-
-## 202509
-
- 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
-  - İçerik: Ortam değişkeninin dize (string) olmaması sorununu düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: RVC-Boss
-
-## 202511
-
- 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
-  - İçerik: Akışlı çıkarım (streaming inference)
-  - Tür: Yeni Özellik
-  - Katkıda Bulunan: ChasonJiang
- 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
-  - İçerik: Matematiksel ifade metinleri için metin ön uç (frontend) mantığını optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: KamioRinn
- 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
-  - İçerik: Akışlı çıkarım (streaming inference)
-  - Tür: Yeni Özellik
-  - Katkıda Bulunan: L-jasmine
- 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
-  - İçerik: VQ dağıtılmış eğitimi destekle
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: wzy3650
- 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
-  - İçerik: ASR model indirme mantığını optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: XXXXRT666
- 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
-  - İçerik: Varsayılan parti boyutu (batch size) hatasını düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: Spr-Aachen
-
-## 202512
-
- 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
-  - İçerik: Örnekleme (sampling) hatasını düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: ChasonJiang
-
-## 202602
-
- 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
-  - İçerik: Kabul edilmeyen Conda koşullarının neden olduğu derleme hatasını düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: Oarora
- 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
-  - İçerik: Otomatik ortam kurulumunu optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: XXXXRT666
-
-# 202604
-
- 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
-  - İçerik: Uzun cümlelerde gereksiz hesaplama ve çıkarım yükünü azaltmak için G2PW çıkarım girdi oluşturmayı ve çok sesli harf işlemeyi optimize et
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: baicai-1145
- 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
-  - İçerik: Windows altında tek kartta GPT-SoVITS v3 için LoRA eğitim akışını iyileştir
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: 2409324124
- 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
-  - İçerik: Birden çok modüldeki çeşitli hataları düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: wishhyt
- 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
-  - İçerik: Veri kümesi işleme için hata işleme ipuçları ekle
-  - Tür: Optimizasyon
-  - Katkıda Bulunan: mushroomcowisheggs
- 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
-  - İçerik: Paralel çıkarımdaki bazı hataları düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: wishhyt
- 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
-  - İçerik: DPO eğitiminin eksik kelime simülasyonunu desteklememe hatasını düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: Mr-Neutr0n
- 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
-  - İçerik: ONNX betiğinde (Optional vb.) eksik içe aktarmaları düzelt
-  - Tür: Düzeltme
-  - Katkıda Bulunan: RVC-Boss
--- a/tools/asr/funasr_asr.py
+++ b/tools/asr/funasr_asr.py
@ -39,7 +39,6 @@ def create_model(language="zh"):
            local_dir="tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
        )
        model_revision = "v2.0.4"
-        vad_model_revision = punc_model_revision = "v2.0.4"
    elif language == "yue":
        path_asr = "tools/asr/models/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online"
        snapshot_download(
@ -52,6 +51,8 @@ def create_model(language="zh"):
    else:
        raise ValueError(f"{language} is not supported")

+    vad_model_revision = punc_model_revision = "v2.0.4"
+
    if language in funasr_models:
        return funasr_models[language]
    else:
--- a/tools/uvr5/lib/lib_v5/spec_utils.py
+++ b/tools/uvr5/lib/lib_v5/spec_utils.py
@ -485,8 +485,6 @@ def istft(spec, hl):
    wave_right = librosa.istft(spec_right, hop_length=hl)
    wave = np.asfortranarray([wave_left, wave_right])

-    return wave
-

 if __name__ == "__main__":
    import argparse