Merge c93bf48785aba24ee47a268f1eb5dcee1b8866dd into ea2d2a81667239d37615697e8f0056e35bab2db6

Update README.md
Update Documentation (#2768 )
2026-04-29 21:00:42 +08:00 · 2026-04-20 10:26:26 +00:00 · 2026-04-19 21:02:57 +08:00 · 2026-04-18 22:33:55 +08:00 · 2026-04-18 19:01:11 +08:00 · 2026-04-18 17:33:53 +08:00
20 changed files with 2932 additions and 142 deletions
--- a/GPT_SoVITS/AR/data/dataset.py
+++ b/GPT_SoVITS/AR/data/dataset.py
@ -67,8 +67,10 @@ class Text2SemanticDataset(Dataset):
            )
        )  # "%s/3-bert"%exp_dir#bert_dir
        self.path6 = semantic_path  # "%s/6-name2semantic.tsv"%exp_dir#semantic_path
-        assert os.path.exists(self.path2)
+        if not os.path.exists(self.path2):
-        assert os.path.exists(self.path6)
+            raise FileNotFoundError(f"Phoneme data file not found: {self.path2}")
        if not os.path.exists(self.path6):
            raise FileNotFoundError(f"Semantic data file not found: {self.path6}")
        self.phoneme_data = {}
        with open(self.path2, "r", encoding="utf8") as f:
            lines = f.read().strip("\n").split("\n")
@ -131,7 +133,7 @@ class Text2SemanticDataset(Dataset):
                phoneme, word2ph, text = self.phoneme_data[item_name]
            except Exception:
                traceback.print_exc()
-                # print(f"{item_name} not in self.phoneme_data !")
+                print(f"Warning: File \"{item_name}\" not in self.phoneme_data! Skipped. ")
                num_not_in += 1
                continue
@ -152,7 +154,7 @@ class Text2SemanticDataset(Dataset):
                phoneme_ids = cleaned_text_to_sequence(phoneme, version)
            except:
                traceback.print_exc()
-                # print(f"{item_name} not in self.phoneme_data !")
+                print(f"Warning: Failed to convert phonemes to sequence for file \"{item_name}\"! Skipped. ")
                num_not_in += 1
                continue
            # if len(phoneme_ids) >400:###########2：改为恒定限制为semantic/2.5就行
@ -228,7 +230,11 @@ class Text2SemanticDataset(Dataset):
            # bert_feature=torch.zeros_like(phoneme_ids,dtype=torch.float32)
            bert_feature = None
        else:
-            assert bert_feature.shape[-1] == len(phoneme_ids)
+            try:
                assert bert_feature.shape[-1] == len(phoneme_ids)
            except AssertionError:
                print(f"AssertionError: The BERT feature dimension ({bert_feature.shape[-1]}) of the file '{item_name}' does not match the length of the phoneme sequence ({len(phoneme_ids)}).")
                raise
        return {
            "idx": idx,
            "phoneme_ids": phoneme_ids,
--- a/GPT_SoVITS/AR/models/utils.py
+++ b/GPT_SoVITS/AR/models/utils.py
@ -262,7 +262,7 @@ def make_reject_y(y_o, y_lens):
    reject_y = []
    reject_y_lens = []
    for b in range(bs):
-        process_item_idx = torch.randint(0, 1, size=(1,))[0]
+        process_item_idx = torch.randint(0, 2, size=(1,))[0]
        if process_item_idx == 0:
            new_y = repeat_P(y_o[b])
            reject_y.append(new_y)
--- a/GPT_SoVITS/AR/modules/patched_mha_with_cache_onnx.py
+++ b/GPT_SoVITS/AR/modules/patched_mha_with_cache_onnx.py
@ -8,30 +8,30 @@ def multi_head_attention_forward_patched(
    query,
    key,
    value,
-    embed_dim_to_check: int,
+    embed_dim_to_check,
-    num_heads: int,
+    num_heads,
    in_proj_weight,
-    in_proj_bias: Optional[Tensor],
+    in_proj_bias,
-    bias_k: Optional[Tensor],
+    bias_k,
-    bias_v: Optional[Tensor],
+    bias_v,
-    add_zero_attn: bool,
+    add_zero_attn,
-    dropout_p: float,
+    dropout_p,
-    out_proj_weight: Tensor,
+    out_proj_weight,
-    out_proj_bias: Optional[Tensor],
+    out_proj_bias,
-    training: bool = True,
+    training=True,
-    key_padding_mask: Optional[Tensor] = None,
+    key_padding_mask=None,
-    need_weights: bool = True,
+    need_weights=True,
-    attn_mask: Optional[Tensor] = None,
+    attn_mask=None,
-    use_separate_proj_weight: bool = False,
+    use_separate_proj_weight=False,
-    q_proj_weight: Optional[Tensor] = None,
+    q_proj_weight=None,
-    k_proj_weight: Optional[Tensor] = None,
+    k_proj_weight=None,
-    v_proj_weight: Optional[Tensor] = None,
+    v_proj_weight=None,
-    static_k: Optional[Tensor] = None,
+    static_k=None,
-    static_v: Optional[Tensor] = None,
+    static_v=None,
-    average_attn_weights: bool = True,
+    average_attn_weights=True,
-    is_causal: bool = False,
+    is_causal=False,
    cache=None,
-) -> Tuple[Tensor, Optional[Tensor]]:
+):
    # set up shape vars
    _, _, embed_dim = query.shape
    attn_mask = _canonical_mask(
--- a/GPT_SoVITS/TTS_infer_pack/TTS.py
+++ b/GPT_SoVITS/TTS_infer_pack/TTS.py
@ -499,7 +499,7 @@ class TTS:
        if if_lora_v3 == True and os.path.exists(path_sovits) == False:
            info = path_sovits + i18n("SoVITS %s 底模缺失，无法加载相应 LoRA 权重" % model_version)
-            raise FileExistsError(info)
+            raise FileNotFoundError(info)
        # dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False)
        dict_s2 = load_sovits_new(weights_path)
@ -1578,16 +1578,15 @@ class TTS:
                max_audio = np.abs(audio).max()
                if max_audio > 1:
                    audio /= max_audio
-            audio = (audio * 32768).astype(np.int16)
+                audio = (audio * 32768).astype(np.int16)
            else:
                audio = audio.cpu().numpy()
                audio = (audio * 32768).astype(np.int16)
            t2 = time.perf_counter()
            print(f"超采样用时：{t2 - t1:.3f}s")
        else:
            # audio = audio.float() * 32768
            # audio = audio.to(dtype=torch.int16).clamp(-32768, 32767).cpu().numpy()
            audio = audio.cpu().numpy()
-
+            audio = (audio * 32768).astype(np.int16)
        audio = (audio * 32768).astype(np.int16)
        # try:
@ -1768,7 +1767,10 @@ class TTS:
            pos += chunk_len * upsample_rate
        audio = self.sola_algorithm(audio_fragments, overlapped_len * upsample_rate)
-        audio = audio[overlapped_len * upsample_rate : -padding_len * upsample_rate]
+        if padding_len > 0:
            audio = audio[overlapped_len * upsample_rate : -padding_len * upsample_rate]
        else:
            audio = audio[overlapped_len * upsample_rate :]
        audio_fragments = []
        for feat_len in feat_lens:
--- a/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
+++ b/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
@ -92,7 +92,7 @@ def cut0(inp):
    if not set(inp).issubset(punctuation):
        return inp
    else:
-        return "/n"
+        return "\n"
 # 凑四句一切
--- a/GPT_SoVITS/module/distrib.py
+++ b/GPT_SoVITS/module/distrib.py
@ -87,7 +87,7 @@ def sync_buffer(buffers, average=True):
    for buffer, handle in handles:
        handle.wait()
        if average:
-            buffer.data /= world_size
+            buffer.data /= world_size()
 def sync_grad(params):
--- a/GPT_SoVITS/s2_train_v3_lora.py
+++ b/GPT_SoVITS/s2_train_v3_lora.py
@ -55,6 +55,10 @@ def main():
        n_gpus = torch.cuda.device_count()
    else:
        n_gpus = 1
    if n_gpus <= 1:
        run(0, n_gpus, hps)
        return
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = str(randint(20000, 55555))
@ -77,12 +81,14 @@ def run(rank, n_gpus, hps):
        writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
        writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
-    dist.init_process_group(
+    use_ddp = n_gpus > 1
-        backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
+    if use_ddp:
-        init_method="env://?use_libuv=False",
+        dist.init_process_group(
-        world_size=n_gpus,
+            backend="gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
-        rank=rank,
+            init_method="env://?use_libuv=False",
-    )
+            world_size=n_gpus,
            rank=rank,
        )
    torch.manual_seed(hps.train.seed)
    if torch.cuda.is_available():
        torch.cuda.set_device(rank)
@ -118,15 +124,20 @@ def run(rank, n_gpus, hps):
        shuffle=True,
    )
    collate_fn = TextAudioSpeakerCollate()
-    train_loader = DataLoader(
+    worker_count = 0 if os.name == "nt" and n_gpus <= 1 else min(2 if os.name == "nt" else 5, os.cpu_count() or 1)
-        train_dataset,
+    loader_kwargs = dict(
-        num_workers=5,
+        num_workers=worker_count,
        shuffle=False,
-        pin_memory=True,
+        pin_memory=torch.cuda.is_available(),
        collate_fn=collate_fn,
        batch_sampler=train_sampler,
-        persistent_workers=True,
+    )
-        prefetch_factor=3,
+    if worker_count > 0:
        loader_kwargs["persistent_workers"] = True
        loader_kwargs["prefetch_factor"] = 2 if os.name == "nt" else 3
    train_loader = DataLoader(
        train_dataset,
        **loader_kwargs,
    )
    save_root = "%s/logs_s2_%s_lora_%s" % (hps.data.exp_dir, hps.model.version, hps.train.lora_rank)
    os.makedirs(save_root, exist_ok=True)
@ -156,7 +167,9 @@ def run(rank, n_gpus, hps):
    def model2cuda(net_g, rank):
        if torch.cuda.is_available():
-            net_g = DDP(net_g.cuda(rank), device_ids=[rank], find_unused_parameters=True)
+            net_g = net_g.cuda(rank)
            if use_ddp:
                net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
        else:
            net_g = net_g.to(device)
        return net_g
@ -242,6 +255,8 @@ def run(rank, n_gpus, hps):
                None,
            )
        scheduler_g.step()
    if use_ddp and dist.is_initialized():
        dist.destroy_process_group()
    print("training done")
--- a/GPT_SoVITS/text/chinese2.py
+++ b/GPT_SoVITS/text/chinese2.py
@ -180,10 +180,15 @@ def _merge_erhua(initials: list[str], finals: list[str], word: str, pos: str) ->
 def _g2p(segments):
    phones_list = []
    word2ph = []
-    for seg in segments:
+    g2pw_batch_results = []
    g2pw_batch_cursor = 0
    processed_segments = [re.sub("[a-zA-Z]+", "", seg) for seg in segments]
    if is_g2pw:
        batch_inputs = [seg for seg in processed_segments if seg]
        g2pw_batch_results = g2pw._g2pw(batch_inputs) if batch_inputs else []
    for seg in processed_segments:
        pinyins = []
        # Replace all English words in the sentence
        seg = re.sub("[a-zA-Z]+", "", seg)
        seg_cut = psg.lcut(seg)
        seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
        initials = []
@ -204,8 +209,10 @@ def _g2p(segments):
            finals = sum(finals, [])
            print("pypinyin结果", initials, finals)
        else:
-            # g2pw采用整句推理
+            # g2pw采用整句推理（批量推理，逐句取结果）
-            pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3)
+            if seg:
                pinyins = g2pw_batch_results[g2pw_batch_cursor]
                g2pw_batch_cursor += 1
            pre_word_length = 0
            for word, pos in seg_cut:
--- a/GPT_SoVITS/text/g2pw/dataset.py
+++ b/GPT_SoVITS/text/g2pw/dataset.py
@ -18,6 +18,7 @@ Credits
 from typing import Dict
 from typing import List
 from typing import Optional
 from typing import Tuple
 import numpy as np
@ -37,6 +38,8 @@ def prepare_onnx_input(
    use_mask: bool = False,
    window_size: int = None,
    max_len: int = 512,
    char2id: Optional[Dict[str, int]] = None,
    char_phoneme_masks: Optional[Dict[str, List[int]]] = None,
 ) -> Dict[str, np.array]:
    if window_size is not None:
        truncated_texts, truncated_query_ids = _truncate_texts(
@ -48,33 +51,88 @@ def prepare_onnx_input(
    phoneme_masks = []
    char_ids = []
    position_ids = []
    tokenized_cache = {}
    if char2id is None:
        char2id = {char: idx for idx, char in enumerate(chars)}
    if use_mask:
        if char_phoneme_masks is None:
            char_phoneme_masks = {
                char: [1 if i in char2phonemes[char] else 0 for i in range(len(labels))]
                for char in char2phonemes
            }
    else:
        full_phoneme_mask = [1] * len(labels)
    for idx in range(len(texts)):
        text = (truncated_texts if window_size else texts)[idx].lower()
        query_id = (truncated_query_ids if window_size else query_ids)[idx]
-        try:
+        cached = tokenized_cache.get(text)
-            tokens, text2token, token2text = tokenize_and_map(tokenizer=tokenizer, text=text)
+        if cached is None:
-        except Exception:
+            try:
-            print(f'warning: text "{text}" is invalid')
+                tokens, text2token, token2text = tokenize_and_map(tokenizer=tokenizer, text=text)
-            return {}
+            except Exception:
                print(f'warning: text "{text}" is invalid')
                return {}
-        text, query_id, tokens, text2token, token2text = _truncate(
+            if len(tokens) <= max_len - 2:
-            max_len=max_len, text=text, query_id=query_id, tokens=tokens, text2token=text2token, token2text=token2text
+                processed_tokens = ["[CLS]"] + tokens + ["[SEP]"]
-        )
+                shared_input_id = list(np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
                shared_token_type_id = list(np.zeros((len(processed_tokens),), dtype=int))
                shared_attention_mask = list(np.ones((len(processed_tokens),), dtype=int))
                cached = {
                    "is_short": True,
                    "tokens": tokens,
                    "text2token": text2token,
                    "token2text": token2text,
                    "input_id": shared_input_id,
                    "token_type_id": shared_token_type_id,
                    "attention_mask": shared_attention_mask,
                }
            else:
                cached = {
                    "is_short": False,
                    "tokens": tokens,
                    "text2token": text2token,
                    "token2text": token2text,
                }
            tokenized_cache[text] = cached
-        processed_tokens = ["[CLS]"] + tokens + ["[SEP]"]
+        if cached["is_short"]:
            text_for_query = text
            query_id_for_query = query_id
            text2token_for_query = cached["text2token"]
            input_id = cached["input_id"]
            token_type_id = cached["token_type_id"]
            attention_mask = cached["attention_mask"]
        else:
            (
                text_for_query,
                query_id_for_query,
                tokens_for_query,
                text2token_for_query,
                _token2text_for_query,
            ) = _truncate(
                max_len=max_len,
                text=text,
                query_id=query_id,
                tokens=cached["tokens"],
                text2token=cached["text2token"],
                token2text=cached["token2text"],
            )
            processed_tokens = ["[CLS]"] + tokens_for_query + ["[SEP]"]
            input_id = list(np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
            token_type_id = list(np.zeros((len(processed_tokens),), dtype=int))
            attention_mask = list(np.ones((len(processed_tokens),), dtype=int))
-        input_id = list(np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
+        query_char = text_for_query[query_id_for_query]
-        token_type_id = list(np.zeros((len(processed_tokens),), dtype=int))
+        if use_mask:
-        attention_mask = list(np.ones((len(processed_tokens),), dtype=int))
+            phoneme_mask = char_phoneme_masks[query_char]
-
+        else:
-        query_char = text[query_id]
+            phoneme_mask = full_phoneme_mask
-        phoneme_mask = (
+        char_id = char2id[query_char]
-            [1 if i in char2phonemes[query_char] else 0 for i in range(len(labels))] if use_mask else [1] * len(labels)
+        position_id = text2token_for_query[query_id_for_query] + 1  # [CLS] token locate at first place
        )
        char_id = chars.index(query_char)
        position_id = text2token[query_id] + 1  # [CLS] token locate at first place
        input_ids.append(input_id)
        token_type_ids.append(token_type_id)
@ -83,10 +141,15 @@ def prepare_onnx_input(
        char_ids.append(char_id)
        position_ids.append(position_id)
    max_token_length = max(len(seq) for seq in input_ids)
    def _pad_sequences(sequences, pad_value=0):
        return [seq + [pad_value] * (max_token_length - len(seq)) for seq in sequences]
    outputs = {
-        "input_ids": np.array(input_ids).astype(np.int64),
+        "input_ids": np.array(_pad_sequences(input_ids, pad_value=0)).astype(np.int64),
-        "token_type_ids": np.array(token_type_ids).astype(np.int64),
+        "token_type_ids": np.array(_pad_sequences(token_type_ids, pad_value=0)).astype(np.int64),
-        "attention_masks": np.array(attention_masks).astype(np.int64),
+        "attention_masks": np.array(_pad_sequences(attention_masks, pad_value=0)).astype(np.int64),
        "phoneme_masks": np.array(phoneme_masks).astype(np.float32),
        "char_ids": np.array(char_ids).astype(np.int64),
        "position_ids": np.array(position_ids).astype(np.int64),
--- a/GPT_SoVITS/text/g2pw/onnx_api.py
+++ b/GPT_SoVITS/text/g2pw/onnx_api.py
@ -10,7 +10,6 @@ from typing import Any, Dict, List, Tuple
 import numpy as np
 import onnxruntime
 import requests
 import torch
 from opencc import OpenCC
 from pypinyin import Style, pinyin
 from transformers.models.auto.tokenization_auto import AutoTokenizer
@ -22,9 +21,8 @@ from .utils import load_config
 onnxruntime.set_default_logger_severity(3)
 try:
    onnxruntime.preload_dlls()
-except:
+except Exception:
    pass
    # traceback.print_exc()
 warnings.filterwarnings("ignore")
 model_version = "1.1"
@ -55,6 +53,24 @@ def predict(session, onnx_input: Dict[str, Any], labels: List[str]) -> Tuple[Lis
    return all_preds, all_confidences
 def _load_json_from_candidates(filename: str, candidate_dirs: List[str]) -> Dict[str, Any]:
    for candidate_dir in candidate_dirs:
        if not candidate_dir:
            continue
        json_path = os.path.join(candidate_dir, filename)
        if os.path.exists(json_path):
            with open(json_path, "r", encoding="utf-8") as fr:
                return json.load(fr)
    raise FileNotFoundError(f"Cannot locate {filename} in candidate dirs: {candidate_dirs}")
 def _find_first_existing_file(*paths: str) -> str:
    for path in paths:
        if path and os.path.exists(path):
            return path
    raise FileNotFoundError(f"Files not found: {paths}")
 def download_and_decompress(model_dir: str = "G2PWModel/"):
    if not os.path.exists(model_dir):
        parent_directory = os.path.dirname(model_dir)
@ -62,7 +78,7 @@ def download_and_decompress(model_dir: str = "G2PWModel/"):
        extract_dir = os.path.join(parent_directory, "G2PWModel_1.1")
        extract_dir_new = os.path.join(parent_directory, "G2PWModel")
        print("Downloading g2pw model...")
-        modelscope_url = "https://www.modelscope.cn/models/kamiorinn/g2pw/resolve/master/G2PWModel_1.1.zip"  # "https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
+        modelscope_url = "https://www.modelscope.cn/models/kamiorinn/g2pw/resolve/master/G2PWModel_1.1.zip"
        with requests.get(modelscope_url, stream=True) as r:
            r.raise_for_status()
            with open(zip_dir, "wb") as f:
@ -79,7 +95,7 @@ def download_and_decompress(model_dir: str = "G2PWModel/"):
    return model_dir
-class G2PWOnnxConverter:
+class _G2PWBaseOnnxConverter:
    def __init__(
        self,
        model_dir: str = "G2PWModel/",
@ -87,33 +103,16 @@ class G2PWOnnxConverter:
        model_source: str = None,
        enable_non_tradional_chinese: bool = False,
    ):
-        uncompress_path = download_and_decompress(model_dir)
+        self.model_dir = download_and_decompress(model_dir)
-
+        self.config = load_config(config_path=os.path.join(self.model_dir, "config.py"), use_default=True)
        sess_options = onnxruntime.SessionOptions()
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
        sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
        sess_options.intra_op_num_threads = 2 if torch.cuda.is_available() else 0
        if "CUDAExecutionProvider" in onnxruntime.get_available_providers():
            self.session_g2pW = onnxruntime.InferenceSession(
                os.path.join(uncompress_path, "g2pW.onnx"),
                sess_options=sess_options,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
            )
        else:
            self.session_g2pW = onnxruntime.InferenceSession(
                os.path.join(uncompress_path, "g2pW.onnx"),
                sess_options=sess_options,
                providers=["CPUExecutionProvider"],
            )
        self.config = load_config(config_path=os.path.join(uncompress_path, "config.py"), use_default=True)
        self.model_source = model_source if model_source else self.config.model_source
        self.enable_opencc = enable_non_tradional_chinese
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_source)
-        polyphonic_chars_path = os.path.join(uncompress_path, "POLYPHONIC_CHARS.txt")
+        polyphonic_chars_path = os.path.join(self.model_dir, "POLYPHONIC_CHARS.txt")
-        monophonic_chars_path = os.path.join(uncompress_path, "MONOPHONIC_CHARS.txt")
+        monophonic_chars_path = os.path.join(self.model_dir, "MONOPHONIC_CHARS.txt")
        self.polyphonic_chars = [
            line.split("\t") for line in open(polyphonic_chars_path, encoding="utf-8").read().strip().split("\n")
        ]
@ -149,31 +148,47 @@ class G2PWOnnxConverter:
        )
        self.chars = sorted(list(self.char2phonemes.keys()))
        self.char2id = {char: idx for idx, char in enumerate(self.chars)}
        self.char_phoneme_masks = (
            {
                char: [1 if i in self.char2phonemes[char] else 0 for i in range(len(self.labels))]
                for char in self.char2phonemes
            }
            if self.config.use_mask
            else None
        )
        self.polyphonic_chars_new = set(self.chars)
        for char in self.non_polyphonic:
-            if char in self.polyphonic_chars_new:
+            self.polyphonic_chars_new.discard(char)
                self.polyphonic_chars_new.remove(char)
        self.monophonic_chars_dict = {char: phoneme for char, phoneme in self.monophonic_chars}
        for char in self.non_monophonic:
-            if char in self.monophonic_chars_dict:
+            self.monophonic_chars_dict.pop(char, None)
                self.monophonic_chars_dict.pop(char)
-        self.pos_tags = ["UNK", "A", "C", "D", "I", "N", "P", "T", "V", "DE", "SHI"]
+        default_asset_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "G2PWModel"))
        candidate_asset_dirs = [self.model_dir, default_asset_dir]
        self.bopomofo_convert_dict = _load_json_from_candidates(
            "bopomofo_to_pinyin_wo_tune_dict.json", candidate_asset_dirs
        )
        self.char_bopomofo_dict = _load_json_from_candidates("char_bopomofo_dict.json", candidate_asset_dirs)
        with open(os.path.join(uncompress_path, "bopomofo_to_pinyin_wo_tune_dict.json"), "r", encoding="utf-8") as fr:
            self.bopomofo_convert_dict = json.load(fr)
        self.style_convert_func = {
            "bopomofo": lambda x: x,
            "pinyin": self._convert_bopomofo_to_pinyin,
        }[style]
        with open(os.path.join(uncompress_path, "char_bopomofo_dict.json"), "r", encoding="utf-8") as fr:
            self.char_bopomofo_dict = json.load(fr)
        if self.enable_opencc:
            self.cc = OpenCC("s2tw")
        self.enable_sentence_dedup = os.getenv("g2pw_sentence_dedup", "true").strip().lower() in {
            "1",
            "true",
            "yes",
            "y",
            "on",
        }
        # 聚焦到多音字附近上下文，默认左右各16字；设为0表示关闭裁剪（整句）。
        self.polyphonic_context_chars = max(0, int(os.getenv("g2pw_polyphonic_context_chars", "16")))
    def _convert_bopomofo_to_pinyin(self, bopomofo: str) -> str:
        tone = bopomofo[-1]
@ -181,9 +196,8 @@ class G2PWOnnxConverter:
        component = self.bopomofo_convert_dict.get(bopomofo[:-1])
        if component:
            return component + tone
-        else:
+        print(f'Warning: "{bopomofo}" cannot convert to pinyin')
-            print(f'Warning: "{bopomofo}" cannot convert to pinyin')
+        return None
            return None
    def __call__(self, sentences: List[str]) -> List[List[str]]:
        if isinstance(sentences, str):
@ -197,51 +211,147 @@ class G2PWOnnxConverter:
                translated_sentences.append(translated_sent)
            sentences = translated_sentences
-        texts, query_ids, sent_ids, partial_results = self._prepare_data(sentences=sentences)
+        texts, model_query_ids, result_query_ids, sent_ids, partial_results = self._prepare_data(sentences=sentences)
        if len(texts) == 0:
            # sentences no polyphonic words
            return partial_results
-        onnx_input = prepare_onnx_input(
+        model_input = prepare_onnx_input(
            tokenizer=self.tokenizer,
            labels=self.labels,
            char2phonemes=self.char2phonemes,
            chars=self.chars,
            texts=texts,
-            query_ids=query_ids,
+            query_ids=model_query_ids,
            use_mask=self.config.use_mask,
            window_size=None,
            char2id=self.char2id,
            char_phoneme_masks=self.char_phoneme_masks,
        )
-        preds, confidences = predict(session=self.session_g2pW, onnx_input=onnx_input, labels=self.labels)
+        if not model_input:
            return partial_results
        if self.enable_sentence_dedup:
            preds, _confidences = self._predict_with_sentence_dedup(model_input=model_input, texts=texts)
        else:
            preds, _confidences = self._predict(model_input=model_input)
        if self.config.use_char_phoneme:
            preds = [pred.split(" ")[1] for pred in preds]
        results = partial_results
-        for sent_id, query_id, pred in zip(sent_ids, query_ids, preds):
+        for sent_id, query_id, pred in zip(sent_ids, result_query_ids, preds):
            results[sent_id][query_id] = self.style_convert_func(pred)
        return results
-    def _prepare_data(self, sentences: List[str]) -> Tuple[List[str], List[int], List[int], List[List[str]]]:
+    def _prepare_data(
-        texts, query_ids, sent_ids, partial_results = [], [], [], []
+        self, sentences: List[str]
    ) -> Tuple[List[str], List[int], List[int], List[int], List[List[str]]]:
        texts, model_query_ids, result_query_ids, sent_ids, partial_results = [], [], [], [], []
        for sent_id, sent in enumerate(sentences):
            # pypinyin works well for Simplified Chinese than Traditional Chinese
            sent_s = tranditional_to_simplified(sent)
            pypinyin_result = pinyin(sent_s, neutral_tone_with_five=True, style=Style.TONE3)
            partial_result = [None] * len(sent)
            polyphonic_indices: List[int] = []
            for i, char in enumerate(sent):
                if char in self.polyphonic_chars_new:
-                    texts.append(sent)
+                    polyphonic_indices.append(i)
                    query_ids.append(i)
                    sent_ids.append(sent_id)
                elif char in self.monophonic_chars_dict:
                    partial_result[i] = self.style_convert_func(self.monophonic_chars_dict[char])
                elif char in self.char_bopomofo_dict:
                    partial_result[i] = pypinyin_result[i][0]
                    # partial_result[i] =  self.style_convert_func(self.char_bopomofo_dict[char][0])
                else:
                    partial_result[i] = pypinyin_result[i][0]
            if polyphonic_indices:
                if self.polyphonic_context_chars > 0:
                    left = max(0, polyphonic_indices[0] - self.polyphonic_context_chars)
                    right = min(len(sent), polyphonic_indices[-1] + self.polyphonic_context_chars + 1)
                    sent_for_predict = sent[left:right]
                    query_offset = left
                else:
                    sent_for_predict = sent
                    query_offset = 0
                for index in polyphonic_indices:
                    texts.append(sent_for_predict)
                    model_query_ids.append(index - query_offset)
                    result_query_ids.append(index)
                    sent_ids.append(sent_id)
            partial_results.append(partial_result)
-        return texts, query_ids, sent_ids, partial_results
+        return texts, model_query_ids, result_query_ids, sent_ids, partial_results
    def _predict(self, model_input: Dict[str, Any]) -> Tuple[List[str], List[float]]:
        raise NotImplementedError
    def _predict_with_sentence_dedup(
        self, model_input: Dict[str, Any], texts: List[str]
    ) -> Tuple[List[str], List[float]]:
        if len(texts) <= 1:
            return self._predict(model_input=model_input)
        grouped_indices: Dict[str, List[int]] = {}
        for idx, text in enumerate(texts):
            grouped_indices.setdefault(text, []).append(idx)
        if all(len(indices) == 1 for indices in grouped_indices.values()):
            return self._predict(model_input=model_input)
        preds: List[str] = [""] * len(texts)
        confidences: List[float] = [0.0] * len(texts)
        for indices in grouped_indices.values():
            group_input = {name: value[indices] for name, value in model_input.items()}
            if len(indices) > 1:
                for name in ("input_ids", "token_type_ids", "attention_masks"):
                    group_input[name] = group_input[name][:1]
            group_preds, group_confidences = self._predict(model_input=group_input)
            for output_idx, pred, confidence in zip(indices, group_preds, group_confidences):
                preds[output_idx] = pred
                confidences[output_idx] = confidence
        return preds, confidences
 class G2PWOnnxConverter(_G2PWBaseOnnxConverter):
    def __init__(
        self,
        model_dir: str = "G2PWModel/",
        style: str = "bopomofo",
        model_source: str = None,
        enable_non_tradional_chinese: bool = False,
    ):
        super().__init__(
            model_dir=model_dir,
            style=style,
            model_source=model_source,
            enable_non_tradional_chinese=enable_non_tradional_chinese,
        )
        sess_options = onnxruntime.SessionOptions()
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
        sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
        sess_options.intra_op_num_threads = 2
        onnx_path = _find_first_existing_file(
            os.path.join(self.model_dir, "g2pW.onnx"),
            os.path.join(self.model_dir, "g2pw.onnx"),
        )
        if "CUDAExecutionProvider" in onnxruntime.get_available_providers():
            self.session_g2pw = onnxruntime.InferenceSession(
                onnx_path,
                sess_options=sess_options,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
            )
        else:
            self.session_g2pw = onnxruntime.InferenceSession(
                onnx_path,
                sess_options=sess_options,
                providers=["CPUExecutionProvider"],
            )
    def _predict(self, model_input: Dict[str, Any]) -> Tuple[List[str], List[float]]:
        return predict(session=self.session_g2pw, onnx_input=model_input, labels=self.labels)
--- a/README.md
+++ b/README.md
@ -48,6 +48,8 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-
 请不要尬黑GPT-SoVITS推理速度慢，谢谢！
 CPU-Optimized Inference Version：https://github.com/baicai-1145/GPT-SoVITS-CPUFast
 **User guide: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
 ## Installation
--- a/api_role.py
+++ b/api_role.py
@ -0,0 +1,825 @@
 """
 GPT-SoVITS API 实现
 ### 完整请求示例 (/ttsrole POST)
 {
    "text": "你好",                     # str, 必填, 要合成的文本内容
    "role": "role1",                   # str, 必填, 角色名称，决定使用 roles/{role} 中的配置和音频
    "emotion": "开心",                  # str, 可选, 情感标签，用于从 roles/{role}/reference_audios 中选择音频
    "text_lang": "auto",               # str, 可选, 默认 "auto", 文本语言，"auto" 时根据 emotion 或角色目录动态选择
    "ref_audio_path": "/path/to/ref.wav",  # str, 可选, 参考音频路径，若提供则优先使用，跳过自动选择
    "aux_ref_audio_paths": ["/path1.wav", "/path2.wav"],  # List[str], 可选, 辅助参考音频路径，用于多说话人融合
    "prompt_lang": "ja",               # str, 可选, 提示文本语言，若提供 ref_audio_path 则需指定，"auto" 模式下动态选择
    "prompt_text": "こんにちは",       # str, 可选, 提示文本，与 ref_audio_path 配对使用，自动选择时从文件或文件名生成
    "top_k": 10,                       # int, 可选, Top-K 采样值，覆盖 inference.top_k
    "top_p": 0.8,                      # float, 可选, Top-P 采样值，覆盖 inference.top_p
    "temperature": 1.0,                # float, 可选, 温度值，覆盖 inference.temperature
    "text_split_method": "cut5",       # str, 可选, 文本分割方法，覆盖 inference.text_split_method, 具体见text_segmentation_method.py
    "batch_size": 2,                   # int, 可选, 批处理大小，覆盖 inference.batch_size
    "batch_threshold": 0.75,           # float, 可选, 批处理阈值，覆盖 inference.batch_threshold
    "split_bucket": true,              # bool, 可选, 是否按桶分割，覆盖 inference.split_bucket
    "speed_factor": 1.2,               # float, 可选, 语速因子，覆盖 inference.speed_factor
    "fragment_interval": 0.3,          # float, 可选, 片段间隔（秒），覆盖 inference.fragment_interval
    "seed": 42,                        # int, 可选, 随机种子，覆盖 seed
    "media_type": "wav",               # str, 可选, 默认 "wav", 输出格式，支持 "wav", "raw", "ogg", "aac"
    "streaming_mode": false,           # bool, 可选, 默认 false, 是否流式返回
    "parallel_infer": true,            # bool, 可选, 默认 true, 是否并行推理
    "repetition_penalty": 1.35,        # float, 可选, 重复惩罚值，覆盖 inference.repetition_penalty
    "version": "v2",                   # str, 可选, 配置文件版本，覆盖 version
    "languages": ["zh", "ja", "en"],   # List[str], 可选, 支持的语言列表，覆盖 languages
    "bert_base_path": "/path/to/bert", # str, 可选, BERT 模型路径，覆盖 bert_base_path
    "cnhuhbert_base_path": "/path/to/hubert",  # str, 可选, HuBERT 模型路径，覆盖 cnhuhbert_base_path
    "device": "cpu",                   # str, 可选, 统一设备，覆盖 device
    "is_half": true,                   # bool, 可选, 是否使用半精度，覆盖 is_half
    "t2s_weights_path": "/path/to/gpt.ckpt",  # str, 可选, GPT 模型路径，覆盖 t2s_weights_path
    "vits_weights_path": "/path/to/sovits.pth",  # str, 可选, SoVITS 模型路径，覆盖 vits_weights_path
    "t2s_model_path": "/path/to/gpt.ckpt",  # str, 可选, GPT 模型路径（与 t2s_weights_path 同义）
    "t2s_model_device": "cpu",         # str, 可选, GPT 模型设备，覆盖 t2s_model.device，默认检测显卡
    "vits_model_path": "/path/to/sovits.pth",  # str, 可选, SoVITS 模型路径（与 vits_weights_path 同义）
    "vits_model_device": "cpu"         # str, 可选, SoVITS 模型设备，覆盖 vits_model.device，默认检测显卡
 }
 ### 参数必要性和优先级
 - 必填参数:
  - /ttsrole: text, role
  - /tts: text, ref_audio_path, prompt_lang
 - 可选参数: 其他均为可选，默认值从 roles/{role}/tts_infer.yaml 或 GPT_SoVITS/configs/tts_infer.yaml 获取
 - 优先级: POST 请求参数 > roles/{role}/tts_infer.yaml > 默认 GPT_SoVITS/configs/tts_infer.yaml
 ### 目录结构
 GPT-SoVITS-roleapi/
 ├── api_role.py                    # 本文件, API 主程序
 ├── GPT_SoVITS/                    # GPT-SoVITS 核心库
 │   └── configs/
 │       └── tts_infer.yaml         # 默认配置文件
 ├── roles/                         # 角色配置目录
 │   ├── role1/                     # 示例角色 role1
 │   │   ├── tts_infer.yaml         # 角色配置文件（可选）
 │   │   ├── model.ckpt             # GPT 模型（可选）
 │   │   ├── model.pth              # SoVITS 模型（可选）
 │   │   └── reference_audios/      # 角色参考音频目录
 │   │       ├── zh/
 │   │       │   ├── 【开心】voice1.wav
 │   │       │   ├── 【开心】voice1.txt
 │   │       ├── ja/
 │   │       │   ├── 【开心】voice2.wav
 │   │       │   ├── 【开心】voice2.txt
 │   ├── role2/
 │   │   ├── tts_infer.yaml
 │   │   ├── model.ckpt
 │   │   ├── model.pth
 │   │   └── reference_audios/
 │   │       ├── zh/
 │   │       │   ├── 【开心】voice1.wav
 │   │       │   ├── 【开心】voice1.txt
 │   │       │   ├── 【悲伤】asdafasdas.wav
 │   │       │   ├── 【悲伤】asdafasdas.txt
 │   │       ├── ja/
 │   │       │   ├── 【开心】voice2.wav
 │   │       │   ├── 【开心】voice2.txt
 ### text_lang, prompt_lang, prompt_text 选择逻辑 (/ttsrole)
 1. text_lang 选择逻辑:
   - 默认值: "auto"
   - 如果请求未提供 text_lang，视为 "auto"
   - 当 text_lang = "auto" 且存在 emotion 参数：
     - 从 roles/{role}/reference_audios 下所有语言文件夹中查找以 "【emotion】" 开头的音频
     - 随机选择一个匹配的音频，语言由音频所在文件夹确定
   - 当 text_lang 指定具体语言（如 "zh"）：
     - 从 roles/{role}/reference_audios/{text_lang} 中选择音频
     - 如果指定语言无匹配音频，则尝试其他语言文件夹
 2. prompt_lang 选择逻辑:
   - 如果提供了 ref_audio_path，则需显式指定 prompt_lang
   - 如果未提供 ref_audio_path 且 text_lang = "auto" 且存在 emotion：
     - prompt_lang = 随机选择的音频所在语言文件夹名（如 "zh" 或 "ja"）
   - 如果未提供 ref_audio_path 且 text_lang 指定具体语言：
     - prompt_lang = text_lang（如 "zh"）
     - 如果 text_lang 无匹配音频，则为随机选择的音频所在语言
 3. prompt_text 选择逻辑:
   - 如果提供了 ref_audio_path（如 "/path/to/ref.wav"）：
     - 检查文件名是否包含 "【xxx】" 前缀：
       - 如果有（如 "【开心】abc.wav"）：
         - 若存在对应 .txt 文件（如 "【开心】abc.txt"），prompt_text = .txt 文件内容
         - 若无对应 .txt 文件，prompt_text = "abc"（去掉 "【开心】" 和 ".wav" 的部分）
       - 如果无 "【xxx】" 前缀：
         - 若存在对应 .txt 文件（如 "ref.txt"），prompt_text = .txt 文件内容
         - 若无对应 .txt 文件，prompt_text = "ref"（去掉 ".wav" 的部分）
   - 如果未提供 ref_audio_path：
     - 从 roles/{role}/reference_audios 中选择音频（基于 text_lang 和 emotion）：
       - 优先匹配 "【emotion】" 前缀的音频（如 "【开心】voice1.wav"）
       - 若存在对应 .txt 文件（如 "【开心】voice1.txt"），prompt_text = .txt 文件内容
       - 若无对应 .txt 文件，prompt_text = "voice1"（去掉 "【开心】" 和 ".wav" 的部分）
       - 未匹配 emotion 则随机选择一个音频，逻辑同上
 ### 讲解
 1. 必填参数:
   - /ttsrole: text, role
   - /tts: text, ref_audio_path, prompt_lang
 2. 音频选择 (/ttsrole):
   - 若提供 ref_audio_path，则使用它
   - 否则根据 role、text_lang、emotion 从 roles/{role}/reference_audios 中选择
   - text_lang = "auto" 时，若有 emotion，则跨语言匹配 "【emotion】" 前缀音频
   - emotion 匹配 "【emotion】" 前缀音频，未匹配则随机选择
 3. 设备选择:
   - 默认尝试检测显卡（torch.cuda.is_available()），若可用则用 "cuda"，否则 "cpu"
   - 若缺少 torch 依赖或检测失败，回退到 "cpu"
   - POST 参数 device, t2s_model_device, vits_model_device 可强制指定设备，优先级最高
 4. 配置文件:
   - 默认加载 GPT_SoVITS/configs/tts_infer.yaml
   - 若 roles/{role}/tts_infer.yaml 存在且未被请求参数覆盖，则使用它 (/ttsrole)
   - 请求参数（如 top_k, bert_base_path）覆盖所有配置文件
 5. 返回格式:
   - 成功时返回音频流 (Response 或 StreamingResponse)
   - 失败时返回 JSON，包含错误消息和可能的异常详情
 6. 运行:
   - python api_role.py -a 127.0.0.1 -p 9880
   - 检查启动日志确认设备
 ### 调用示例 (/ttsrole)
 ## 非流式调用，会一次性返回完整的音频数据，适用于需要完整音频文件的场景
 import requests
 url = "http://127.0.0.1:9880/ttsrole"
 payload = {
    "text": "你好，这是一个测试",  # 要合成的文本
    "role": "role1",               # 角色名称，必填
    "emotion": "开心",              # 情感标签，可选
    "text_lang": "zh",             # 文本语言，可选，默认为 "zh"
    "media_type": "wav"            # 输出音频格式，默认 "wav"
 }
 response = requests.post(url, json=payload)
 if response.status_code == 200:
    with open("output_non_stream.wav", "wb") as f:
        f.write(response.content)
    print("非流式音频已生成并保存为 output_non_stream.wav")
 else:
    print(f"请求失败: {response.json()}")
 ## 流式调用，会分块返回音频数据，适用于实时播放或处理大文件的场景
 import requests
 url = "http://127.0.0.1:9880/ttsrole"
 payload = {
    "text": "你好，这是一个测试",  # 要合成的文本
    "role": "role1",               # 角色名称，必填
    "emotion": "开心",              # 情感标签，可选
    "text_lang": "zh",             # 文本语言，可选，默认为 "zh"
    "media_type": "wav",           # 输出音频格式，默认 "wav"
    "streaming_mode": True         # 启用流式模式
 }
 with requests.post(url, json=payload, stream=True) as response:
    if response.status_code == 200:
        with open("output_stream.wav", "wb") as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:  # 确保 chunk 不为空
                    f.write(chunk)
        print("流式音频已生成并保存为 output_stream.wav")
    else:
        print(f"请求失败: {response.json()}")
 """
 import os
 import sys
 import traceback
 from typing import Generator, Optional, List, Dict
 import random
 import glob
 from concurrent.futures import ThreadPoolExecutor
 import asyncio
 now_dir = os.getcwd()
 sys.path.append(now_dir)
 sys.path.append("%s/GPT_SoVITS" % (now_dir))
 import argparse
 import subprocess
 import wave
 import signal
 import numpy as np
 import soundfile as sf
 from fastapi import FastAPI, HTTPException, Response
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
 import uvicorn
 from io import BytesIO
 from tools.i18n.i18n import I18nAuto
 from GPT_SoVITS.TTS_infer_pack.TTS import TTS, TTS_Config
 from GPT_SoVITS.TTS_infer_pack.text_segmentation_method import get_method_names as get_cut_method_names
 # 尝试导入 PyTorch，检测显卡支持
 try:
    import torch
    cuda_available = torch.cuda.is_available()
 except ImportError:
    cuda_available = False
    print("缺少 PyTorch 依赖，默认使用 CPU")
 except Exception as e:
    cuda_available = False
    print(f"检测显卡时出错: {str(e)}，默认使用 CPU")
 i18n = I18nAuto()
 cut_method_names = get_cut_method_names()
 parser = argparse.ArgumentParser(description="GPT-SoVITS api")
 parser.add_argument("-c", "--tts_config", type=str, default="GPT_SoVITS/configs/tts_infer.yaml", help="tts_infer路径")
 parser.add_argument("-a", "--bind_addr", type=str, default="127.0.0.1", help="default: 127.0.0.1")
 parser.add_argument("-p", "--port", type=int, default="9880", help="default: 9880")
 args = parser.parse_args()
 config_path = args.tts_config
 port = args.port
 host = args.bind_addr
 argv = sys.argv
 if config_path in [None, ""]:
    config_path = "GPT_SoVITS/configs/tts_infer.yaml"
 default_device = "cuda" if cuda_available else "cpu"
 print(f"默认设备设置为: {default_device}")
 # 初始化 TTS 配置
 tts_config = TTS_Config(config_path)
 print(f"TTS_Config contents: {tts_config.__dict__}")
 if hasattr(tts_config, 'device'):
    tts_config.device = default_device
 tts_pipeline = TTS(tts_config)
 # 创建线程池用于异步执行 TTS 任务
 executor = ThreadPoolExecutor(max_workers=1)
 APP = FastAPI()
 class TTS_Request(BaseModel):
    text: str
    ref_audio_path: str
    prompt_lang: str
    text_lang: str = "auto"
    aux_ref_audio_paths: Optional[List[str]] = None
    prompt_text: Optional[str] = ""
    top_k: Optional[int] = 5
    top_p: Optional[float] = 1
    temperature: Optional[float] = 1
    text_split_method: Optional[str] = "cut5"
    batch_size: Optional[int] = 1
    batch_threshold: Optional[float] = 0.75
    split_bucket: Optional[bool] = True
    speed_factor: Optional[float] = 1.0
    fragment_interval: Optional[float] = 0.3
    seed: Optional[int] = -1
    media_type: Optional[str] = "wav"
    streaming_mode: Optional[bool] = False
    parallel_infer: Optional[bool] = True
    repetition_penalty: Optional[float] = 1.35
    device: Optional[str] = None
 class TTSRole_Request(BaseModel):
    text: str
    role: str
    text_lang: Optional[str] = "auto"
    ref_audio_path: Optional[str] = None
    aux_ref_audio_paths: Optional[List[str]] = None
    prompt_lang: Optional[str] = None
    prompt_text: Optional[str] = None
    emotion: Optional[str] = None
    top_k: Optional[int] = 5
    top_p: Optional[float] = 1
    temperature: Optional[float] = 1
    text_split_method: Optional[str] = "cut5"
    batch_size: Optional[int] = 1
    batch_threshold: Optional[float] = 0.75
    split_bucket: Optional[bool] = True
    speed_factor: Optional[float] = 1.0
    fragment_interval: Optional[float] = 0.3
    seed: Optional[int] = -1
    media_type: Optional[str] = "wav"
    streaming_mode: Optional[bool] = False
    parallel_infer: Optional[bool] = True
    repetition_penalty: Optional[float] = 1.35
    bert_base_path: Optional[str] = None
    cnhuhbert_base_path: Optional[str] = None
    device: Optional[str] = None
    is_half: Optional[bool] = None
    t2s_weights_path: Optional[str] = None
    version: Optional[str] = None
    vits_weights_path: Optional[str] = None
    t2s_model_path: Optional[str] = None
    vits_model_path: Optional[str] = None
    t2s_model_device: Optional[str] = None
    vits_model_device: Optional[str] = None
 def pack_ogg(io_buffer: BytesIO, data: np.ndarray, rate: int):
    with sf.SoundFile(io_buffer, mode='w', samplerate=rate, channels=1, format='ogg') as audio_file:
        audio_file.write(data)
    io_buffer.seek(0)
    return io_buffer
 def pack_raw(io_buffer: BytesIO, data: np.ndarray, rate: int):
    io_buffer.write(data.tobytes())
    io_buffer.seek(0)
    return io_buffer
 def pack_wav(io_buffer: BytesIO, data: np.ndarray, rate: int):
    sf.write(io_buffer, data, rate, format='wav')
    io_buffer.seek(0)
    return io_buffer
 def pack_aac(io_buffer: BytesIO, data: np.ndarray, rate: int):
    process = subprocess.Popen([
        'ffmpeg', '-f', 's16le', '-ar', str(rate), '-ac', '1', '-i', 'pipe:0',
        '-c:a', 'aac', '-b:a', '192k', '-vn', '-f', 'adts', 'pipe:1'
    ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, _ = process.communicate(input=data.tobytes())
    io_buffer.write(out)
    io_buffer.seek(0)
    return io_buffer
 def pack_audio(data: np.ndarray, rate: int, media_type: str) -> BytesIO:
    io_buffer = BytesIO()
    if media_type == "ogg":
        io_buffer = pack_ogg(io_buffer, data, rate)
    elif media_type == "aac":
        io_buffer = pack_aac(io_buffer, data, rate)
    elif media_type == "wav":
        io_buffer = pack_wav(io_buffer, data, rate)
    else:
        io_buffer = pack_raw(io_buffer, data, rate)
    return io_buffer
 def wave_header_chunk(frame_input=b"", channels=1, sample_width=2, sample_rate=32000):
    wav_buf = BytesIO()
    with wave.open(wav_buf, "wb") as vfout:
        vfout.setnchannels(channels)
        vfout.setsampwidth(sample_width)
        vfout.setframerate(sample_rate)
        vfout.writeframes(frame_input)
    wav_buf.seek(0)
    return wav_buf.read()
 def handle_control(command: str):
    if command == "restart":
        os.execl(sys.executable, sys.executable, *argv)
    elif command == "exit":
        os.kill(os.getpid(), signal.SIGTERM)
        exit(0)
 def check_params(req: dict, is_ttsrole: bool = False):
    text = req.get("text")
    text_lang = req.get("text_lang", "auto")
    ref_audio_path = req.get("ref_audio_path")
    prompt_lang = req.get("prompt_lang")
    media_type = req.get("media_type", "wav")
    streaming_mode = req.get("streaming_mode", False)
    text_split_method = req.get("text_split_method", "cut5")
    if not text:
        return {"status": "error", "message": "text is required"}
    if is_ttsrole:
        role = req.get("role")
        if not role:
            return {"status": "error", "message": "role is required for /ttsrole"}
    else:
        if not ref_audio_path:
            return {"status": "error", "message": "ref_audio_path is required"}
        if not prompt_lang:
            return {"status": "error", "message": "prompt_lang is required"}
    languages = req.get("languages") or tts_config.languages
    if text_lang != "auto" and text_lang.lower() not in languages:
        return {"status": "error", "message": f"text_lang: {text_lang} is not supported"}
    if prompt_lang and prompt_lang.lower() not in languages:
        return {"status": "error", "message": f"prompt_lang: {prompt_lang} is not supported"}
    if media_type not in ["wav", "raw", "ogg", "aac"]:
        return {"status": "error", "message": f"media_type: {media_type} is not supported"}
    if media_type == "ogg" and not streaming_mode:
        return {"status": "error", "message": "ogg format is not supported in non-streaming mode"}
    if text_split_method not in cut_method_names:
        return {"status": "error", "message": f"text_split_method: {text_split_method} is not supported"}
    return None
 def load_role_config(role: str, req: dict):
    role_dir = os.path.join(now_dir, "roles", role)
    if not os.path.exists(role_dir):
        return False
    if not any(req.get(k) for k in ["version", "bert_base_path", "cnhuhbert_base_path", "device", "is_half", "t2s_weights_path", "vits_weights_path"]):
        config_path_new = os.path.join(role_dir, "tts_infer.yaml")
        if os.path.exists(config_path_new):
            global tts_config, tts_pipeline
            tts_config = TTS_Config(config_path_new)
            if hasattr(tts_config, 'device'):
                tts_config.device = default_device
            tts_pipeline = TTS(tts_config)
    if not req.get("t2s_weights_path") and not req.get("t2s_model_path"):
        gpt_path = glob.glob(os.path.join(role_dir, "*.ckpt"))
        if gpt_path:
            tts_pipeline.init_t2s_weights(gpt_path[0])
    if not req.get("vits_weights_path") and not req.get("vits_model_path"):
        sovits_path = glob.glob(os.path.join(role_dir, "*.pth"))
        if sovits_path:
            tts_pipeline.init_vits_weights(sovits_path[0])
    return True
 def select_ref_audio(role: str, text_lang: str, emotion: str = None):
    audio_base_dir = os.path.join(now_dir, "roles", role, "reference_audios")
    if not os.path.exists(audio_base_dir):
        return None, None, None
    if text_lang.lower() == "auto" and emotion:
        all_langs = [d for d in os.listdir(audio_base_dir) if os.path.isdir(os.path.join(audio_base_dir, d))]
        emotion_files = []
        for lang in all_langs:
            lang_dir = os.path.join(audio_base_dir, lang)
            emotion_files.extend(glob.glob(os.path.join(lang_dir, f"【{emotion}】*.*")))
        if emotion_files:
            audio_path = random.choice(emotion_files)
            txt_path = audio_path.rsplit(".", 1)[0] + ".txt"
            if os.path.exists(txt_path):
                with open(txt_path, "r", encoding="utf-8") as f:
                    prompt_text = f.read().strip()
            else:
                basename = os.path.basename(audio_path)
                start_idx = basename.find("】") + 1
                end_idx = basename.rfind(".")
                prompt_text = basename[start_idx:end_idx] if end_idx > start_idx else basename
            prompt_lang = os.path.basename(os.path.dirname(audio_path))
            return audio_path, prompt_text, prompt_lang
    lang_dir = os.path.join(audio_base_dir, text_lang.lower())
    all_langs = [d for d in os.listdir(audio_base_dir) if os.path.isdir(os.path.join(audio_base_dir, d))]
    def find_audio_in_dir(dir_path):
        if not os.path.exists(dir_path):
            return None, None
        audio_files = glob.glob(os.path.join(dir_path, "【*】*.*"))
        if not audio_files:
            audio_files = glob.glob(os.path.join(dir_path, "*.*"))
        if not audio_files:
            return None, None
        if emotion:
            emotion_files = [f for f in audio_files if f"【{emotion}】" in os.path.basename(f)]
            if emotion_files:
                audio_path = random.choice(emotion_files)
            else:
                audio_path = random.choice(audio_files)
        else:
            audio_path = random.choice(audio_files)
        txt_path = audio_path.rsplit(".", 1)[0] + ".txt"
        prompt_text = None
        if os.path.exists(txt_path):
            with open(txt_path, "r", encoding="utf-8") as f:
                prompt_text = f.read().strip()
        else:
            basename = os.path.basename(audio_path)
            start_idx = basename.find("】") + 1
            end_idx = basename.rfind(".")
            if start_idx > 0 and end_idx > start_idx:
                prompt_text = basename[start_idx:end_idx]
            else:
                prompt_text = basename[:end_idx] if end_idx > 0 else basename
        return audio_path, prompt_text
    audio_path, prompt_text = find_audio_in_dir(lang_dir)
    if audio_path:
        return audio_path, prompt_text, text_lang.lower()
    for lang in all_langs:
        if lang != text_lang.lower():
            audio_path, prompt_text = find_audio_in_dir(os.path.join(audio_base_dir, lang))
            if audio_path:
                return audio_path, prompt_text, lang
    return None, None, None
 def set_pipeline_device(pipeline: TTS, device: str):
    """将 TTS 管道中的所有模型和相关组件迁移到指定设备，仅在设备变化时执行"""
    if not torch.cuda.is_available() and device.startswith("cuda"):
        print(f"警告: CUDA 不可用，强制使用 CPU")
        device = "cpu"
    target_device = torch.device(device)
    # 检查当前设备是否需要切换
    current_device = None
    if hasattr(pipeline, 't2s_model') and pipeline.t2s_model is not None:
        current_device = next(pipeline.t2s_model.parameters()).device
    elif hasattr(pipeline, 'vits_model') and pipeline.vits_model is not None:
        current_device = next(pipeline.vits_model.parameters()).device
    if current_device == target_device:
        print(f"设备已是 {device}，无需切换")
        return
    # 更新配置中的设备
    if hasattr(pipeline, 'configs') and hasattr(pipeline.configs, 'device'):
        pipeline.configs.device = device
    # 迁移所有可能的模型到指定设备
    for attr in ['t2s_model', 'vits_model']:
        if hasattr(pipeline, attr) and getattr(pipeline, attr) is not None:
            getattr(pipeline, attr).to(target_device)
    for attr in dir(pipeline):
        if attr.endswith('_model') and getattr(pipeline, attr) is not None:
            try:
                getattr(pipeline, attr).to(target_device)
                print(f"迁移 {attr} 到 {device}")
            except AttributeError:
                pass
    # 清理 GPU 缓存
    if torch.cuda.is_available() and not device.startswith("cuda"):
        torch.cuda.empty_cache()
    print(f"TTS 管道设备已设置为: {device}")
 def run_tts_pipeline(req):
    """在线程池中运行 TTS 任务"""
    return tts_pipeline.run(req)
 async def tts_handle(req: dict, is_ttsrole: bool = False):
    streaming_mode = req.get("streaming_mode", False)
    media_type = req.get("media_type", "wav")
    if "text_lang" not in req:
        req["text_lang"] = "auto"
    check_res = check_params(req, is_ttsrole)
    if check_res is not None:
        return JSONResponse(status_code=400, content=check_res)
    # 如果请求中指定了 device，则覆盖所有与设备相关的参数并更新管道设备
    if "device" in req and req["device"] is not None:
        device = req["device"]
        req["t2s_model_device"] = device
        req["vits_model_device"] = device
        if hasattr(tts_config, 'device'):
            tts_config.device = device
        set_pipeline_device(tts_pipeline, device)
    if is_ttsrole:
        role_exists = load_role_config(req["role"], req)
        for key in ["bert_base_path", "cnhuhbert_base_path", "device", "is_half", "t2s_weights_path", "version", "vits_weights_path"]:
            if req.get(key) is not None:
                setattr(tts_config, key, req[key])
        if req.get("t2s_model_path"):
            tts_config.t2s_weights_path = req["t2s_model_path"]
            tts_pipeline.init_t2s_weights(req["t2s_model_path"])
        if req.get("vits_model_path"):
            tts_config.vits_weights_path = req["vits_model_path"]
            tts_pipeline.init_vits_weights(req["vits_model_path"])
        if not req.get("ref_audio_path"):
            ref_audio_path, prompt_text, prompt_lang = select_ref_audio(req["role"], req["text_lang"], req.get("emotion"))
            if ref_audio_path:
                req["ref_audio_path"] = ref_audio_path
                req["prompt_text"] = prompt_text or ""
                req["prompt_lang"] = prompt_lang or req["text_lang"]
            elif not role_exists:
                return JSONResponse(status_code=400, content={"status": "error", "message": "Role directory not found and no suitable reference audio provided"})
        else:
            ref_audio_path = req["ref_audio_path"]
            txt_path = ref_audio_path.rsplit(".", 1)[0] + ".txt"
            if os.path.exists(txt_path):
                with open(txt_path, "r", encoding="utf-8") as f:
                    req["prompt_text"] = f.read().strip()
            else:
                basename = os.path.basename(ref_audio_path)
                if "【" in basename and "】" in basename:
                    start_idx = basename.find("】") + 1
                    end_idx = basename.rfind(".")
                    if start_idx > 0 and end_idx > start_idx:
                        req["prompt_text"] = basename[start_idx:end_idx]
                    else:
                        req["prompt_text"] = basename[:end_idx] if end_idx > 0 else basename
                else:
                    end_idx = basename.rfind(".")
                    req["prompt_text"] = basename[:end_idx] if end_idx > 0 else basename
    if streaming_mode:
        req["return_fragment"] = True
    try:
        print(f"当前请求设备: {req.get('device')}")
        if hasattr(tts_pipeline, 't2s_model') and tts_pipeline.t2s_model is not None:
            print(f"t2s_model 设备: {next(tts_pipeline.t2s_model.parameters()).device}")
        if hasattr(tts_pipeline, 'vits_model') and tts_pipeline.vits_model is not None:
            print(f"vits_model 设备: {next(tts_pipeline.vits_model.parameters()).device}")
        # 异步执行 TTS 任务
        loop = asyncio.get_event_loop()
        tts_generator = await loop.run_in_executor(executor, run_tts_pipeline, req)
        if streaming_mode:
            def streaming_generator():
                if media_type == "wav":
                    yield wave_header_chunk()
                    stream_type = "raw"
                else:
                    stream_type = media_type
                for sr, chunk in tts_generator:
                    buf = pack_audio(chunk, sr, stream_type)
                    yield buf.getvalue()
            return StreamingResponse(streaming_generator(), media_type=f"audio/{media_type}")
        else:
            sr, audio_data = next(tts_generator)
            buf = pack_audio(audio_data, sr, media_type)
            return Response(buf.getvalue(), media_type=f"audio/{media_type}")
    except Exception as e:
        return JSONResponse(status_code=400, content={"status": "error", "message": "tts failed", "exception": str(e)})
@APP.get("/control")
 async def control(command: str = None):
    if command is None:
        return JSONResponse(status_code=400, content={"status": "error", "message": "command is required"})
    handle_control(command)
@APP.get("/tts")
 async def tts_get_endpoint(
    text: str,
    ref_audio_path: str,
    prompt_lang: str,
    text_lang: str = "auto",
    aux_ref_audio_paths: Optional[List[str]] = None,
    prompt_text: Optional[str] = "",
    top_k: Optional[int] = 5,
    top_p: Optional[float] = 1,
    temperature: Optional[float] = 1,
    text_split_method: Optional[str] = "cut0",
    batch_size: Optional[int] = 1,
    batch_threshold: Optional[float] = 0.75,
    split_bucket: Optional[bool] = True,
    speed_factor: Optional[float] = 1.0,
    fragment_interval: Optional[float] = 0.3,
    seed: Optional[int] = -1,
    media_type: Optional[str] = "wav",
    streaming_mode: Optional[bool] = False,
    parallel_infer: Optional[bool] = True,
    repetition_penalty: Optional[float] = 1.35,
    device: Optional[str] = None
 ):
    req = {
        "text": text,
        "text_lang": text_lang.lower(),
        "ref_audio_path": ref_audio_path,
        "aux_ref_audio_paths": aux_ref_audio_paths,
        "prompt_lang": prompt_lang.lower(),
        "prompt_text": prompt_text,
        "top_k": top_k,
        "top_p": top_p,
        "temperature": temperature,
        "text_split_method": text_split_method,
        "batch_size": batch_size,
        "batch_threshold": batch_threshold,
        "split_bucket": split_bucket,
        "speed_factor": speed_factor,
        "fragment_interval": fragment_interval,
        "seed": seed,
        "media_type": media_type,
        "streaming_mode": streaming_mode,
        "parallel_infer": parallel_infer,
        "repetition_penalty": repetition_penalty,
        "device": device
    }
    return await tts_handle(req)
@APP.post("/tts")
 async def tts_post_endpoint(request: TTS_Request):
    req = request.dict(exclude_unset=True)
    if "text_lang" in req:
        req["text_lang"] = req["text_lang"].lower()
    if "prompt_lang" in req:
        req["prompt_lang"] = req["prompt_lang"].lower()
    return await tts_handle(req)
@APP.get("/ttsrole")
 async def ttsrole_get_endpoint(
    text: str,
    role: str,
    text_lang: str = "auto",
    ref_audio_path: Optional[str] = None,
    aux_ref_audio_paths: Optional[List[str]] = None,
    prompt_lang: Optional[str] = None,
    prompt_text: Optional[str] = None,
    emotion: Optional[str] = None,
    top_k: Optional[int] = 5,
    top_p: Optional[float] = 1,
    temperature: Optional[float] = 1,
    text_split_method: Optional[str] = "cut5",
    batch_size: Optional[int] = 1,
    batch_threshold: Optional[float] = 0.75,
    split_bucket: Optional[bool] = True,
    speed_factor: Optional[float] = 1.0,
    fragment_interval: Optional[float] = 0.3,
    seed: Optional[int] = -1,
    media_type: Optional[str] = "wav",
    streaming_mode: Optional[bool] = False,
    parallel_infer: Optional[bool] = True,
    repetition_penalty: Optional[float] = 1.35,
    bert_base_path: Optional[str] = None,
    cnhuhbert_base_path: Optional[str] = None,
    device: Optional[str] = None,
    is_half: Optional[bool] = None,
    t2s_weights_path: Optional[str] = None,
    version: Optional[str] = None,
    vits_weights_path: Optional[str] = None,
    t2s_model_path: Optional[str] = None,
    vits_model_path: Optional[str] = None,
    t2s_model_device: Optional[str] = None,
    vits_model_device: Optional[str] = None
 ):
    req = {
        "text": text,
        "role": role,
        "text_lang": text_lang.lower(),
        "ref_audio_path": ref_audio_path,
        "aux_ref_audio_paths": aux_ref_audio_paths,
        "prompt_lang": prompt_lang.lower() if prompt_lang else None,
        "prompt_text": prompt_text,
        "emotion": emotion,
        "top_k": top_k,
        "top_p": top_p,
        "temperature": temperature,
        "text_split_method": text_split_method,
        "batch_size": batch_size,
        "batch_threshold": batch_threshold,
        "split_bucket": split_bucket,
        "speed_factor": speed_factor,
        "fragment_interval": fragment_interval,
        "seed": seed,
        "media_type": media_type,
        "streaming_mode": streaming_mode,
        "parallel_infer": parallel_infer,
        "repetition_penalty": repetition_penalty,
        "bert_base_path": bert_base_path,
        "cnhuhbert_base_path": cnhuhbert_base_path,
        "device": device,
        "is_half": is_half,
        "t2s_weights_path": t2s_weights_path,
        "version": version,
        "vits_weights_path": vits_weights_path,
        "t2s_model_path": t2s_model_path,
        "vits_model_path": vits_model_path,
        "t2s_model_device": t2s_model_device,
        "vits_model_device": vits_model_device
    }
    return await tts_handle(req, is_ttsrole=True)
@APP.post("/ttsrole")
 async def ttsrole_post_endpoint(request: TTSRole_Request):
    req = request.dict(exclude_unset=True)
    if "text_lang" in req:
        req["text_lang"] = req["text_lang"].lower()
    if "prompt_lang" in req:
        req["prompt_lang"] = req["prompt_lang"].lower()
    return await tts_handle(req, is_ttsrole=True)
@APP.get("/set_gpt_weights")
 async def set_gpt_weights(weights_path: str = None):
    try:
        if not weights_path:
            return JSONResponse(status_code=400, content={"status": "error", "message": "gpt weight path is required"})
        tts_pipeline.init_t2s_weights(weights_path)
        tts_config.t2s_weights_path = weights_path
        return JSONResponse(status_code=200, content={"status": "success", "message": "success"})
    except Exception as e:
        return JSONResponse(status_code=400, content={"status": "error", "message": f"change gpt weight failed", "exception": str(e)})
@APP.get("/set_sovits_weights")
 async def set_sovits_weights(weights_path: str = None):
    try:
        if not weights_path:
            return JSONResponse(status_code=400, content={"status": "error", "message": "sovits weight path is required"})
        tts_pipeline.init_vits_weights(weights_path)
        tts_config.vits_weights_path = weights_path
        return JSONResponse(status_code=200, content={"status": "success", "message": "success"})
    except Exception as e:
        return JSONResponse(status_code=400, content={"status": "error", "message": f"change sovits weight failed", "exception": str(e)})
@APP.get("/set_refer_audio")
 async def set_refer_audio(refer_audio_path: str = None):
    try:
        if not refer_audio_path:
            return JSONResponse(status_code=400, content={"status": "error", "message": "refer audio path is required"})
        tts_pipeline.set_ref_audio(refer_audio_path)
        return JSONResponse(status_code=200, content={"status": "success", "message": "success"})
    except Exception as e:
        return JSONResponse(status_code=400, content={"status": "error", "message": f"set refer audio failed", "exception": str(e)})
 if __name__ == "__main__":
    try:
        if host == 'None':  # 在调用时使用 -a None 参数，可以让api监听双栈
            host = None
        uvicorn.run(app=APP, host=host, port=port, workers=1)
    except Exception as e:
        traceback.print_exc()
        os.kill(os.getpid(), signal.SIGTERM)
        exit(0)
--- a/api_role_v3.py
+++ b/api_role_v3.py
--- a/docs/cn/Changelog_CN.md
+++ b/docs/cn/Changelog_CN.md
@ -594,11 +594,11 @@
  - 内容: 修复实验名结尾出现空格在win中路径不正确的问题
  - 类型: 修复
  - 提交: RVC-Boss
- 2025.06.10 [Commit#746cb536](https://github.com/RVC-Boss/GPT-SoVITS/commit/746cb536c68b1fe6ce3ca7e882235375b8a8dd89)
+- 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
  - 内容: 语种分割优化
  - 类型: 优化
  - 提交: KamioRinn
- 2025.06.11 [Commit#dd2b9253](https://github.com/RVC-Boss/GPT-SoVITS/commit/dd2b9253aabb09db32db7a3344570ed9df043351)
+- 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
  - 内容: 修复并行推理对v2pro支持bug
  - 类型: 修复
  - 提交: YYuX-1145
@ -606,21 +606,132 @@
  - 内容: v2pro对ge提取时会出现数值溢出的问题修复
  - 类型: 修复
  - 提交: RVC-Boss
- 2025.06.11 [Commit#37f5abfc](https://github.com/RVC-Boss/GPT-SoVITS/commit/6fdc67ca83418306f11e90b9139278313ac5c3e9)[Commit#6fdc67ca](https://github.com/RVC-Boss/GPT-SoVITS/commit/37f5abfcb4a6553652235909db2e124b6f8ff3a5)
+- 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
  - 内容: install.sh逻辑优化
  - 类型: 优化
  - 提交: XXXXRT666
- 2025.06.27 [Commit#90ebefa7](https://github.com/RVC-Boss/GPT-SoVITS/commit/90ebefa78fd544da36eebe0b2003620879c921b0)
+- 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
  - 内容: onnxruntime加载逻辑优化（对gpu/cpu的判断）
  - 类型: 优化
  - 提交: KamioRinn
- 2025.06.27 [Commit#6df61f58](https://github.com/RVC-Boss/GPT-SoVITS/commit/6df61f58e4d18d4c2ad9d1eddd6a1bd690034c23)
+- 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
  - 内容: 语言分割及格式化优化
  - 类型: 优化
  - 提交: KamioRinn
 ## 202507
 - 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
  - 内容: 提升推理进程优先级（修复win11下可能GPU利用率受限的问题）
-  - 类型: 修复
+  - 类型: 优化
  - 提交: XianYue0125
 - 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
  - 内容: 解决 TTS.py 无法识别真正支持版本 v2Pro、v2ProPlus 的问题, 同时更新一版默认配置。
  - 类型: 修复
  - 提交: jiangsier-xyz
 - 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
  - 内容: 修复并行推理模式下v2pro模型识别问题
  - 类型: 修复
  - 提交: RVC-Boss
 - 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
  - 内容: whisper asr支持性价比更高的distill模型
  - 类型: 优化
  - 提交: XXXXRT666
 - 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
  - 内容: 优化TTS_Config的代码逻辑
  - 类型: 优化
  - 提交: ChasonJiang
 - 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
  - 内容: 修复gpt的loss计算问题
  - 类型: 修复
  - 提交: ChasonJiang
 ## 202508
 - 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
  - 内容: WSL Rocm
  - 类型: 修复
  - 提交: XXXXRT666
 ## 202509
 - 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
  - 内容: 修复环境变量可能不为str的问题
  - 类型: 修复
  - 提交: RVC-Boss
 ## 202511
 - 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
  - 内容: 流式推理
  - 类型: 新功能
  - 提交: ChasonJiang
 - 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
  - 内容: 数学计算文本前端逻辑优化
  - 类型: 优化
  - 提交: KamioRinn
 - 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
  - 内容: 流式推理
  - 类型: 新功能
  - 提交: L-jasmine
 - 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
  - 内容: 支持vq分布式训练
  - 类型: 优化
  - 提交: wzy3650
 - 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
  - 内容: ASR模型下载逻辑优化
  - 类型: 优化
  - 提交: XXXXRT666
 - 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
  - 内容: default batch size bug 修复
  - 类型: 修复
  - 提交: Spr-Aachen
 ## 202512
 - 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
  - 内容: 修复采样错误
  - 类型: 修复
  - 提交: ChasonJiang
 ## 202602
 - 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
  - 内容: 修复 Conda 条款未同意导致的构建失败
  - 类型: 修复
  - 提交: Oarora
 - 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
  - 内容: 环境自动构建优化
  - 类型: 优化
  - 提交: XXXXRT666
 ## 202604
 - 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
  - 内容: 优化 G2PW 的推理输入构造与多音字处理流程，减少重复计算，降低长句场景下的推理开销
  - 类型: 优化
  - 提交: baicai-1145
 - 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
  - 内容: 改进 Windows 单卡 v3 LoRA 训练流程
  - 类型: 优化
  - 提交: 2409324124
 - 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
  - 内容: 修复多个模块中的独立 bug
  - 类型: 修复
  - 提交: wishhyt
 - 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
  - 内容: 添加数据集的错误处理提示
  - 类型: 优化
  - 提交: mushroomcowisheggs
 - 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
  - 内容: 并行推理部分bug修复
  - 类型: 修复
  - 提交: wishhyt
 - 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
  - 内容: bug修复：DPO 训练不支持漏字模拟
  - 类型: 修复
  - 提交: Mr-Neutr0n
 - 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
  - 内容: 修复onnx脚本未导入Optional等的问题
  - 类型: 修复
  - 提交: RVC-Boss
--- a/docs/en/Changelog_EN.md
+++ b/docs/en/Changelog_EN.md
@ -578,3 +578,160 @@
  - Content: Optimized automatic precision detection logic; added collapsible functionality to WebUI frontend modules.
  - Type: New Feature
  - Contributors: XXXXRT666, RVC-Boss
 - 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
  - Content: Fix polyphone detection for "X一X" pattern
  - Type: Fix
  - Contributor: wzy3650
 - 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
  - Content: Config fix; fix SoVITS model loading
  - Type: Fix
  - Contributor: wzy3650
 - 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
  - Content: Fix possible numerical explosion of `ge.sum` causing silent inference
  - Type: Fix
  - Contributor: RVC-Boss
 - 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
  - Content: Fix incorrect Windows path when experiment name ends with a space
  - Type: Fix
  - Contributor: RVC-Boss
 - 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
  - Content: Optimize language segmentation
  - Type: Optimization
  - Contributor: KamioRinn
 - 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
  - Content: Fix bug in parallel inference support for v2pro
  - Type: Fix
  - Contributor: YYuX-1145
 - 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
  - Content: Fix numerical overflow issue when extracting `ge` for v2pro
  - Type: Fix
  - Contributor: RVC-Boss
 - 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
  - Content: Optimize `install.sh` logic
  - Type: Optimization
  - Contributor: XXXXRT666
 - 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
  - Content: Optimize onnxruntime loading logic (GPU/CPU detection)
  - Type: Optimization
  - Contributor: KamioRinn
 - 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
  - Content: Optimize language segmentation and formatting
  - Type: Optimization
  - Contributor: KamioRinn
 ## 202507
 - 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
  - Content: Increase inference process priority (fix possible GPU utilization limitation on Win11)
  - Type: Optimization
  - Contributor: XianYue0125
 - 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
  - Content: Fix TTS.py not recognizing actually supported versions v2Pro and v2ProPlus, and update default configuration
  - Type: Fix
  - Contributor: jiangsier-xyz
 - 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
  - Content: Fix v2pro model recognition issue in parallel inference mode
  - Type: Fix
  - Contributor: RVC-Boss
 - 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
  - Content: Whisper ASR supports more cost-effective distill models
  - Type: Optimization
  - Contributor: XXXXRT666
 - 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
  - Content: Optimize `TTS_Config` code logic
  - Type: Optimization
  - Contributor: ChasonJiang
 - 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
  - Content: Fix GPT loss calculation issue
  - Type: Fix
  - Contributor: ChasonJiang
 ## 202508
 - 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
  - Content: WSL Rocm
  - Type: Fix
  - Contributor: XXXXRT666
 ## 202509
 - 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
  - Content: Fix issue where environment variable may not be a string
  - Type: Fix
  - Contributor: RVC-Boss
 ## 202511
 - 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
  - Content: Streaming inference
  - Type: New Feature
  - Contributor: ChasonJiang
 - 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
  - Content: Optimize text frontend logic for mathematical expression text
  - Type: Optimization
  - Contributor: KamioRinn
 - 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
  - Content: Streaming inference
  - Type: New Feature
  - Contributor: L-jasmine
 - 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
  - Content: Support VQ distributed training
  - Type: Optimization
  - Contributor: wzy3650
 - 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
  - Content: Optimize ASR model download logic
  - Type: Optimization
  - Contributor: XXXXRT666
 - 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
  - Content: Fix default batch size bug
  - Type: Fix
  - Contributor: Spr-Aachen
 ## 202512
 - 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
  - Content: Fix sampling error
  - Type: Fix
  - Contributor: ChasonJiang
 ## 202602
 - 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
  - Content: Fix build failure caused by unaccepted Conda terms
  - Type: Fix
  - Contributor: Oarora
 - 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
  - Content: Optimize automatic environment setup
  - Type: Optimization
  - Contributor: XXXXRT666
 ## 202604
 - 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
  - Content: Optimize G2PW inference input construction and polyphone handling to reduce redundant computation and inference overhead for long sentences
  - Type: Optimization
  - Contributor: baicai-1145
 - 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
  - Content: Improve the LoRA training flow for GPT-SoVITS v3 on a single card under Windows
  - Type: Optimization
  - Contributor: 2409324124
 - 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
  - Content: Fix miscellaneous bugs in multiple modules
  - Type: Fix
  - Contributor: wishhyt
 - 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
  - Content: Add error handling hints for dataset processing
  - Type: Optimization
  - Contributor: mushroomcowisheggs
 - 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
  - Content: Fix some bugs in parallel inference
  - Type: Fix
  - Contributor: wishhyt
 - 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
  - Content: Fix bug where DPO training does not support missing word simulation
  - Type: Fix
  - Contributor: Mr-Neutr0n
 - 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
  - Content: Fix missing imports (e.g., Optional) in ONNX script
  - Type: Fix
  - Contributor: RVC-Boss
--- a/docs/ja/Changelog_JA.md
+++ b/docs/ja/Changelog_JA.md
@ -578,3 +578,160 @@
  - 内容: 自動精度検出ロジックを最適化し、WebUI フロントエンドモジュールに折り畳み（Collapsible）機能を追加
  - タイプ: 新機能
  - 貢献者: XXXXRT666, RVC-Boss
 - 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
  - 内容: 「X一X」パターンの多音字検出を修正
  - タイプ: 修正
  - 貢献者: wzy3650
 - 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
  - 内容: 設定の修正；SoVITSモデル読み込みの修正
  - タイプ: 修正
  - 貢献者: wzy3650
 - 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
  - 内容: `ge.sum`の数値爆発による推論の無音化を修正
  - タイプ: 修正
  - 貢献者: RVC-Boss
 - 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
  - 内容: 実験名がスペースで終わる場合のWindowsパスの誤りを修正
  - タイプ: 修正
  - 貢献者: RVC-Boss
 - 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
  - 内容: 言語分割の最適化
  - タイプ: 最適化
  - 貢献者: KamioRinn
 - 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
  - 内容: v2proの並列推論対応におけるバグを修正
  - タイプ: 修正
  - 貢献者: YYuX-1145
 - 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
  - 内容: v2proの`ge`抽出時の数値オーバーフロー問題を修正
  - タイプ: 修正
  - 貢献者: RVC-Boss
 - 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
  - 内容: `install.sh`のロジックを最適化
  - タイプ: 最適化
  - 貢献者: XXXXRT666
 - 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
  - 内容: onnxruntime読み込みロジックを最適化（GPU/CPU検出）
  - タイプ: 最適化
  - 貢献者: KamioRinn
 - 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
  - 内容: 言語分割と書式を最適化
  - タイプ: 最適化
  - 貢献者: KamioRinn
 ## 202507
 - 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
  - 内容: 推論プロセスの優先度を上げる（Win11でのGPU利用制限の可能性を修正）
  - タイプ: 最適化
  - 貢献者: XianYue0125
 - 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
  - 内容: TTS.pyが実際にサポートされているバージョンv2Proおよびv2ProPlusを認識しない問題を修正し、デフォルト設定を更新
  - タイプ: 修正
  - 貢献者: jiangsier-xyz
 - 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
  - 内容: 並列推論モードでのv2proモデル認識問題を修正
  - タイプ: 修正
  - 貢献者: RVC-Boss
 - 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
  - 内容: Whisper ASRがよりコスト効率の高い蒸留モデルをサポート
  - タイプ: 最適化
  - 貢献者: XXXXRT666
 - 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
  - 内容: `TTS_Config`のコードロジックを最適化
  - タイプ: 最適化
  - 貢献者: ChasonJiang
 - 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
  - 内容: GPT損失計算の問題を修正
  - タイプ: 修正
  - 貢献者: ChasonJiang
 ## 202508
 - 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
  - 内容: WSL Rocm対応
  - タイプ: 修正
  - 貢献者: XXXXRT666
 ## 202509
 - 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
  - 内容: 環境変数が文字列でない可能性がある問題を修正
  - タイプ: 修正
  - 貢献者: RVC-Boss
 ## 202511
 - 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
  - 内容: ストリーミング推論
  - タイプ: 新機能
  - 貢献者: ChasonJiang
 - 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
  - 内容: 数式テキストに対するテキスト前処理ロジックを最適化
  - タイプ: 最適化
  - 貢献者: KamioRinn
 - 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
  - 内容: ストリーミング推論
  - タイプ: 新機能
  - 貢献者: L-jasmine
 - 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
  - 内容: VQ分散学習をサポート
  - タイプ: 最適化
  - 貢献者: wzy3650
 - 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
  - 内容: ASRモデルダウンロードロジックを最適化
  - タイプ: 最適化
  - 貢献者: XXXXRT666
 - 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
  - 内容: デフォルトのバッチサイズのバグを修正
  - タイプ: 修正
  - 貢献者: Spr-Aachen
 ## 202512
 - 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
  - 内容: サンプリングエラーを修正
  - タイプ: 修正
  - 貢献者: ChasonJiang
 ## 202602
 - 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
  - 内容: 受け入れられなかったConda利用規約によるビルド失敗を修正
  - タイプ: 修正
  - 貢献者: Oarora
 - 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
  - 内容: 自動環境セットアップを最適化
  - タイプ: 最適化
  - 貢献者: XXXXRT666
 ## 202604
 - 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
  - 内容: G2PW推論入力の構築と多音字処理を最適化し、長文における冗長な計算と推論オーバーヘッドを削減
  - タイプ: 最適化
  - 貢献者: baicai-1145
 - 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
  - 内容: WindowsでのシングルカードにおけるGPT-SoVITS v3のLoRAトレーニングフローを改善
  - タイプ: 最適化
  - 貢献者: 2409324124
 - 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
  - 内容: 複数モジュールの雑多なバグを修正
  - タイプ: 修正
  - 貢献者: wishhyt
 - 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
  - 内容: データセット処理時のエラーハンドリングヒントを追加
  - タイプ: 最適化
  - 貢献者: mushroomcowisheggs
 - 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
  - 内容: 並列推論の一部バグを修正
  - タイプ: 修正
  - 貢献者: wishhyt
 - 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
  - 内容: DPOトレーニングが欠落単語シミュレーションをサポートしないバグを修正
  - タイプ: 修正
  - 貢献者: Mr-Neutr0n
 - 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
  - 内容: ONNXスクリプトでの（Optionalなどの）不足インポートを修正
  - タイプ: 修正
  - 貢献者: RVC-Boss
--- a/docs/ko/Changelog_KO.md
+++ b/docs/ko/Changelog_KO.md
@ -578,3 +578,160 @@
  - 내용: 자동 정밀도 감지 로직 최적화; WebUI 프론트엔드 모듈에 접기 기능 추가
  - 유형: 신규 기능
  - 기여자: XXXXRT666, RVC-Boss
 - 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
  - 내용: "X一X" 패턴의 다중 발음 감지 오류 수정
  - 유형: 수정
  - 기여자: wzy3650
 - 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
  - 내용: 설정 오류 수정; SoVITS 모델 로딩 오류 수정
  - 유형: 수정
  - 기여자: wzy3650
 - 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
  - 내용: `ge.sum`의 수치 폭발 가능성으로 인한 추론 무음 현상 수정
  - 유형: 수정
  - 기여자: RVC-Boss
 - 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
  - 내용: 실험 이름이 공백으로 끝날 때 발생하는 잘못된 Windows 경로 문제 수정
  - 유형: 수정
  - 기여자: RVC-Boss
 - 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
  - 내용: 언어 분할 최적화
  - 유형: 최적화
  - 기여자: KamioRinn
 - 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
  - 내용: v2pro 병렬 추론 지원 버그 수정
  - 유형: 수정
  - 기여자: YYuX-1145
 - 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
  - 내용: v2pro의 `ge` 추출 시 수치 오버플로우 문제 수정
  - 유형: 수정
  - 기여자: RVC-Boss
 - 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
  - 내용: `install.sh` 로직 최적화
  - 유형: 최적화
  - 기여자: XXXXRT666
 - 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
  - 내용: onnxruntime 로딩 로직 최적화 (GPU/CPU 감지)
  - 유형: 최적화
  - 기여자: KamioRinn
 - 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
  - 내용: 언어 분할 및 형식 최적화
  - 유형: 최적화
  - 기여자: KamioRinn
 ## 202507
 - 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
  - 내용: 추론 프로세스 우선순위 증가 (Win11에서 GPU 활용 제한 가능성 수정)
  - 유형: 최적화
  - 기여자: XianYue0125
 - 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
  - 내용: TTS.py가 실제 지원되는 버전 v2Pro 및 v2ProPlus를 인식하지 못하는 문제 수정 및 기본 설정 업데이트
  - 유형: 수정
  - 기여자: jiangsier-xyz
 - 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
  - 내용: 병렬 추론 모드에서 v2pro 모델 인식 문제 수정
  - 유형: 수정
  - 기여자: RVC-Boss
 - 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
  - 내용: Whisper ASR이 더 비용 효율적인 distill 모델 지원
  - 유형: 최적화
  - 기여자: XXXXRT666
 - 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
  - 내용: `TTS_Config` 코드 로직 최적화
  - 유형: 최적화
  - 기여자: ChasonJiang
 - 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
  - 내용: GPT 손실(loss) 계산 문제 수정
  - 유형: 수정
  - 기여자: ChasonJiang
 ## 202508
 - 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
  - 내용: WSL Rocm
  - 유형: 수정
  - 기여자: XXXXRT666
 ## 202509
 - 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
  - 내용: 환경 변수가 문자열이 아닐 수 있는 문제 수정
  - 유형: 수정
  - 기여자: RVC-Boss
 ## 202511
 - 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
  - 내용: 스트리밍 추론
  - 유형: 새 기능
  - 기여자: ChasonJiang
 - 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
  - 내용: 수학 표현식 텍스트에 대한 텍스트 전처리 로직 최적화
  - 유형: 최적화
  - 기여자: KamioRinn
 - 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
  - 내용: 스트리밍 추론
  - 유형: 새 기능
  - 기여자: L-jasmine
 - 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
  - 내용: VQ 분산 학습 지원
  - 유형: 최적화
  - 기여자: wzy3650
 - 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
  - 내용: ASR 모델 다운로드 로직 최적화
  - 유형: 최적화
  - 기여자: XXXXRT666
 - 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
  - 내용: 기본 배치 크기 버그 수정
  - 유형: 수정
  - 기여자: Spr-Aachen
 ## 202512
 - 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
  - 내용: 샘플링 오류 수정
  - 유형: 수정
  - 기여자: ChasonJiang
 ## 202602
 - 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
  - 내용: Conda 약관 미동의로 인한 빌드 실패 수정
  - 유형: 수정
  - 기여자: Oarora
 - 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
  - 내용: 자동 환경 설정 최적화
  - 유형: 최적화
  - 기여자: XXXXRT666
 ## 202604
 - 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
  - 내용: G2PW 추론 입력 구성 및 다중 발음 처리를 최적화하여 긴 문장에 대한 중복 계산 및 추론 오버헤드 감소
  - 유형: 최적화
  - 기여자: baicai-1145
 - 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
  - 내용: Windows 환경 단일 GPU에서 GPT-SoVITS v3의 LoRA 학습 흐름 개선
  - 유형: 최적화
  - 기여자: 2409324124
 - 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
  - 내용: 여러 모듈의 잡다한 버그 수정
  - 유형: 수정
  - 기여자: wishhyt
 - 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
  - 내용: 데이터셋 처리를 위한 오류 처리 힌트 추가
  - 유형: 최적화
  - 기여자: mushroomcowisheggs
 - 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
  - 내용: 병렬 추론의 일부 버그 수정
  - 유형: 수정
  - 기여자: wishhyt
 - 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
  - 내용: DPO 학습이 누락 단어 시뮬레이션을 지원하지 않는 버그 수정
  - 유형: 수정
  - 기여자: Mr-Neutr0n
 - 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
  - 내용: ONNX 스크립트에서 Optional 등 누락된 임포트 문제 수정
  - 유형: 수정
  - 기여자: RVC-Boss
--- a/docs/tr/Changelog_TR.md
+++ b/docs/tr/Changelog_TR.md
@ -2,8 +2,6 @@
 ## 202401
 ## 202401
 - 2024.01.21 [PR#108](https://github.com/RVC-Boss/GPT-SoVITS/pull/108)
  - İçerik: WebUI'ya İngilizce sistem çeviri desteği eklendi.
  - Tür: Dokümantasyon
@ -332,6 +330,8 @@
  - Tür: Optimizasyon
  - Katkıda Bulunan: RVC-Boss, GoHomeToMacDonal
  - İlgili: [PR#672](https://github.com/RVC-Boss/GPT-SoVITS/pull/672)
 - Gelecek güncellemeler, `fast_inference` dalındaki değişikliklerin tutarlılığını doğrulamaya devam edecek.
 - 2024.07.13 [PR#1294](https://github.com/RVC-Boss/GPT-SoVITS/pull/1294), [PR#1298](https://github.com/RVC-Boss/GPT-SoVITS/pull/1298)
  - İçerik: i18n taraması yeniden düzenlendi ve çok dilli yapılandırma dosyaları güncellendi
  - Tür: Dokümantasyon
@ -578,3 +578,160 @@
  - İçerik: Otomatik hassasiyet algılama mantığı optimize edildi; WebUI önyüz modüllerine katlanabilir özellik eklendi
  - Tür: Yeni Özellik
  - Katkıda Bulunanlar: XXXXRT666, RVC-Boss
 - 2025.06.06 [PR#2427](https://github.com/RVC-Boss/GPT-SoVITS/pull/2427)
  - İçerik: "X一X" kalıbı için çok sesli harf tespitini düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: wzy3650
 - 2025.06.05 [PR#2439](https://github.com/RVC-Boss/GPT-SoVITS/pull/2439)
  - İçerik: Yapılandırma düzeltmesi; SoVITS model yüklemesini düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: wzy3650
 - 2025.06.09 [Commit#8056efe4](https://github.com/RVC-Boss/GPT-SoVITS/commit/8056efe4ab7bbc3610c72ae356a6f37518441f7d)
  - İçerik: `ge.sum` kaynaklı olası sayısal patlamayı (sessiz çıkarıma yol açan) düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: RVC-Boss
 - 2025.06.10 [Commit#2c0436b9](https://github.com/RVC-Boss/GPT-SoVITS/commit/2c0436b9ce397424ae03476c836fb64c6e5ebcc6)
  - İçerik: Deney adı boşlukla bittiğinde oluşan hatalı Windows yolunu düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: RVC-Boss
 - 2025.06.10 [PR#2449](https://github.com/RVC-Boss/GPT-SoVITS/pull/2449)
  - İçerik: Dil bölütlemeyi optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: KamioRinn
 - 2025.06.11 [PR#2450](https://github.com/RVC-Boss/GPT-SoVITS/pull/2450)
  - İçerik: v2pro için paralel çıkarım desteğindeki hatayı düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: YYuX-1145
 - 2025.06.11 [Commit#ed89a023](https://github.com/RVC-Boss/GPT-SoVITS/commit/ed89a023378dabba9d4b6580235bb9742245816d)
  - İçerik: v2pro için `ge` çıkarımındaki sayısal taşma sorununu düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: RVC-Boss
 - 2025.06.17 [PR#2464](https://github.com/RVC-Boss/GPT-SoVITS/pull/2464) [PR#2482](https://github.com/RVC-Boss/GPT-SoVITS/pull/2482)
  - İçerik: `install.sh` mantığını optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: XXXXRT666
 - 2025.06.27 [PR#2489](https://github.com/RVC-Boss/GPT-SoVITS/pull/2489)
  - İçerik: onnxruntime yükleme mantığını optimize et (GPU/CPU algılama)
  - Tür: Optimizasyon
  - Katkıda Bulunan: KamioRinn
 - 2025.06.27 [PR#2488](https://github.com/RVC-Boss/GPT-SoVITS/pull/2488)
  - İçerik: Dil bölütleme ve biçimlendirmeyi optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: KamioRinn
 ## 202507
 - 2025.07.10 [Commit#426e1a2bb](https://github.com/RVC-Boss/GPT-SoVITS/commit/426e1a2bb43614af2479b877c37acfb0591e952f)
  - İçerik: Çıkarım işlem önceliğini artır (Win11'de olası GPU kullanım sınırlamasını düzelt)
  - Tür: Optimizasyon
  - Katkıda Bulunan: XianYue0125
 - 2025.07.16 [PR#2490](https://github.com/RVC-Boss/GPT-SoVITS/pull/2490)
  - İçerik: TTS.py'nin gerçekte desteklenen sürümler olan v2Pro ve v2ProPlus'ı tanımaması sorununu düzelt ve varsayılan yapılandırmayı güncelle
  - Tür: Düzeltme
  - Katkıda Bulunan: jiangsier-xyz
 - 2025.07.16 [Commit#4d8ebf85](https://github.com/RVC-Boss/GPT-SoVITS/commit/4d8ebf85233d4f1166d7cc02fdc595602975ca8f)
  - İçerik: Paralel çıkarım modunda v2pro model tanıma sorununu düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: RVC-Boss
 - 2025.07.17 [PR#2531](https://github.com/RVC-Boss/GPT-SoVITS/pull/2531)
  - İçerik: Whisper ASR daha uygun maliyetli distill modellerini destekler
  - Tür: Optimizasyon
  - Katkıda Bulunan: XXXXRT666
 - 2025.07.18 [PR#2536](https://github.com/RVC-Boss/GPT-SoVITS/pull/2536)
  - İçerik: `TTS_Config` kod mantığını optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: ChasonJiang
 - 2025.07.18 [PR#2537](https://github.com/RVC-Boss/GPT-SoVITS/pull/2537)
  - İçerik: GPT kayıp (loss) hesaplama sorununu düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: ChasonJiang
 ## 202508
 - 2025.08.02 [PR#2561](https://github.com/RVC-Boss/GPT-SoVITS/pull/2561)
  - İçerik: WSL Rocm
  - Tür: Düzeltme
  - Katkıda Bulunan: XXXXRT666
 ## 202509
 - 2025.09.10 [Commit#11aa78bd](https://github.com/RVC-Boss/GPT-SoVITS/commit/11aa78bd9bda8b53047cfcae03abf7ca94d27391)
  - İçerik: Ortam değişkeninin dize (string) olmaması sorununu düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: RVC-Boss
 ## 202511
 - 2025.11.28 [PR#2671](https://github.com/RVC-Boss/GPT-SoVITS/pull/2671) [PR#2678](https://github.com/RVC-Boss/GPT-SoVITS/pull/2678)
  - İçerik: Akışlı çıkarım (streaming inference)
  - Tür: Yeni Özellik
  - Katkıda Bulunan: ChasonJiang
 - 2025.11.28 [PR#2636](https://github.com/RVC-Boss/GPT-SoVITS/pull/2636)
  - İçerik: Matematiksel ifade metinleri için metin ön uç (frontend) mantığını optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: KamioRinn
 - 2025.11.28 [PR#2469](https://github.com/RVC-Boss/GPT-SoVITS/pull/2469)
  - İçerik: Akışlı çıkarım (streaming inference)
  - Tür: Yeni Özellik
  - Katkıda Bulunan: L-jasmine
 - 2025.11.28 [PR#2577](https://github.com/RVC-Boss/GPT-SoVITS/pull/2577)
  - İçerik: VQ dağıtılmış eğitimi destekle
  - Tür: Optimizasyon
  - Katkıda Bulunan: wzy3650
 - 2025.11.28 [PR#2627](https://github.com/RVC-Boss/GPT-SoVITS/pull/2627) [PR#2679](https://github.com/RVC-Boss/GPT-SoVITS/pull/2679)
  - İçerik: ASR model indirme mantığını optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: XXXXRT666
 - 2025.11.28 [PR#2662](https://github.com/RVC-Boss/GPT-SoVITS/pull/2662)
  - İçerik: Varsayılan parti boyutu (batch size) hatasını düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: Spr-Aachen
 ## 202512
 - 2025.12.30 [PR#2703](https://github.com/RVC-Boss/GPT-SoVITS/pull/2703) [PR#2704](https://github.com/RVC-Boss/GPT-SoVITS/pull/2704)
  - İçerik: Örnekleme (sampling) hatasını düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: ChasonJiang
 ## 202602
 - 2026.02.08 [PR#2727](https://github.com/RVC-Boss/GPT-SoVITS/pull/2727)
  - İçerik: Kabul edilmeyen Conda koşullarının neden olduğu derleme hatasını düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: Oarora
 - 2026.02.09 [PR#2732](https://github.com/RVC-Boss/GPT-SoVITS/pull/2732)
  - İçerik: Otomatik ortam kurulumunu optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: XXXXRT666
 # 202604
 - 2026.04.18 [PR#2763](https://github.com/RVC-Boss/GPT-SoVITS/pull/2763)
  - İçerik: Uzun cümlelerde gereksiz hesaplama ve çıkarım yükünü azaltmak için G2PW çıkarım girdi oluşturmayı ve çok sesli harf işlemeyi optimize et
  - Tür: Optimizasyon
  - Katkıda Bulunan: baicai-1145
 - 2026.04.18 [PR#2767](https://github.com/RVC-Boss/GPT-SoVITS/pull/2767)
  - İçerik: Windows altında tek kartta GPT-SoVITS v3 için LoRA eğitim akışını iyileştir
  - Tür: Optimizasyon
  - Katkıda Bulunan: 2409324124
 - 2026.04.18 [PR#2755](https://github.com/RVC-Boss/GPT-SoVITS/pull/2755)
  - İçerik: Birden çok modüldeki çeşitli hataları düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: wishhyt
 - 2026.04.18 [PR#2758](https://github.com/RVC-Boss/GPT-SoVITS/pull/2758)
  - İçerik: Veri kümesi işleme için hata işleme ipuçları ekle
  - Tür: Optimizasyon
  - Katkıda Bulunan: mushroomcowisheggs
 - 2026.04.18 [PR#2753](https://github.com/RVC-Boss/GPT-SoVITS/pull/2753)
  - İçerik: Paralel çıkarımdaki bazı hataları düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: wishhyt
 - 2026.04.18 [PR#2733](https://github.com/RVC-Boss/GPT-SoVITS/pull/2733)
  - İçerik: DPO eğitiminin eksik kelime simülasyonunu desteklememe hatasını düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: Mr-Neutr0n
 - 2026.04.18 [Commit#02425ea](https://github.com/RVC-Boss/GPT-SoVITS/commit/02425ea25680c26c700be0bc158756c69103d827)
  - İçerik: ONNX betiğinde (Optional vb.) eksik içe aktarmaları düzelt
  - Tür: Düzeltme
  - Katkıda Bulunan: RVC-Boss
--- a/tools/asr/funasr_asr.py
+++ b/tools/asr/funasr_asr.py
@ -39,6 +39,7 @@ def create_model(language="zh"):
            local_dir="tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
        )
        model_revision = "v2.0.4"
        vad_model_revision = punc_model_revision = "v2.0.4"
    elif language == "yue":
        path_asr = "tools/asr/models/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online"
        snapshot_download(
@ -51,8 +52,6 @@ def create_model(language="zh"):
    else:
        raise ValueError(f"{language} is not supported")
    vad_model_revision = punc_model_revision = "v2.0.4"
    if language in funasr_models:
        return funasr_models[language]
    else:
--- a/tools/uvr5/lib/lib_v5/spec_utils.py
+++ b/tools/uvr5/lib/lib_v5/spec_utils.py
@ -485,6 +485,8 @@ def istft(spec, hl):
    wave_right = librosa.istft(spec_right, hop_length=hl)
    wave = np.asfortranarray([wave_left, wave_right])
    return wave
 if __name__ == "__main__":
    import argparse
Author	SHA1	Message	Date
spawner	4a25469099	Merge c93bf48785aba24ee47a268f1eb5dcee1b8866dd into ea2d2a81667239d37615697e8f0056e35bab2db6	2026-04-20 10:26:26 +00:00
RVC-Boss	ea2d2a8166	Update README.md	2026-04-19 21:02:57 +08:00
SapphireLab	d9f03dad3e	Update Documentation (#2768 ) * 调整日志格式 * docs: Update other languages' changelogs	2026-04-18 22:33:55 +08:00
RVC-Boss	647935357a	Update Changelog_CN.md	2026-04-18 19:01:11 +08:00
RVC-Boss	02425ea256	Fixed issues such as missing imports for types like `Optional`. Fixed issues such as missing imports for types like `Optional`.	2026-04-18 17:33:53 +08:00
Harikrishna KP	938f05fce8	fix: correct torch.randint upper bound to include both values (#2733 )	2026-04-18 17:19:55 +08:00
huang yutong	445d18ccce	fix: 修复 TTS 音频后处理中的多个缺陷 (#2753 ) 1. 修复音频超采样时 int16 双重转换导致整数溢出（CRITICAL） - audio_postprocess 中 `audio = (audio * 32768).astype(np.int16)` 位于 if/else 块之外无条件执行，当 super_sampling=True 时音频已在分支内转为 int16，再次乘以 32768 导致溢出和音频完全失真 - 同时修复 super_sampling=True 但超分模型不存在时 torch.Tensor 调用 .astype() 的 AttributeError 2. 修复 batched vocoder 推理中 padding_len=0 导致音频丢失（HIGH） - 当 padding_len 恰好为 0 时，`-0 * upsample_rate == 0`，切片 `audio[x:0]` 返回空张量，导致整段音频丢失 3. 修复文件不存在时错误地抛出 FileExistsError（LOW） - 应为 FileNotFoundError Made-with: Cursor	2026-04-18 17:16:24 +08:00
Mushroomcowisheggs	00ce973412	feat: 添加数据集的错误处理提示 (#2758 ) Co-authored-by: moomushroom <107208254+moomushroom@users.noreply.github.com>	2026-04-18 17:13:30 +08:00
huang yutong	14191901cd	fix: 修复多个模块中的独立 bug (#2755 ) 1. 修复 sync_buffer 中除以函数对象而非调用结果（distrib.py） - `buffer.data /= world_size` 中 world_size 是函数，缺少 ()，导致 TypeError 使分布式训练 buffer 同步失败 2. 修复 istft 函数缺少 return 语句（spec_utils.py） - 函数计算了结果但未返回，调用者始终得到 None 3. 修复 cut0 返回字面量 "/n" 而非换行符 "\n"（text_segmentation_method.py） - 导致后续 text.split("\n") 无法正确切分，字面 /n 被当作文本内容 4. 修复粤语 ASR 的 vad/punc model_revision 被无条件覆盖（funasr_asr.py） - 粤语分支将 vad_model_revision 设为空（因不使用 VAD/标点模型），但 if/else 外的赋值将其覆盖为 "v2.0.4"，传入错误的 revision 参数 Made-with: Cursor	2026-04-18 17:10:56 +08:00
东云	780383d5bd	[codex] Improve Windows single-GPU v3 LoRA training / 改进 Windows 单卡 v3 LoRA 训练流程 (#2767 ) * Improve Windows single-GPU v3 LoRA training * Drop unrelated checkpoint helper change from PR * Tighten PR scope to single-GPU training path fixes	2026-04-18 16:54:26 +08:00
白菜工厂1145号员工	ba8de9b760	优化 G2PW 的推理输入构造与多音字处理流程，减少重复计算，降低长句场景下的推理开销 (#2763 ) * Enhance G2P processing by implementing batch input handling in _g2p function, improving efficiency. Update prepare_onnx_input to utilize caching for tokenization and add optional parameters for character ID mapping and phoneme masks. Refactor G2PWOnnxConverter to streamline model loading and configuration management. * Enhance G2PW model input handling by introducing polyphonic context character support and updating the data preparation method to return additional query IDs. This improves the processing of polyphonic characters in sentences.	2026-04-18 16:52:32 +08:00
spawner	c93bf48785	Merge pull request #1 from RVC-Boss/main 111	2025-03-30 19:47:31 +08:00
spawner	c9308ec96b	Update api_role_v3.py	2025-03-07 15:03:56 +08:00
spawner	87c521ea6e	Create api_role_v3.py	2025-03-07 14:57:51 +08:00
spawner	f19b76b26f	Add files via upload	2025-03-07 14:56:45 +08:00