From ccc7757880ea99149390d8d00542cfa017c9fa42 Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Sun, 11 Feb 2024 01:34:25 +0800 Subject: [PATCH 01/18] Adjust-asr --- tools/asr/config.py | 2 +- tools/asr/fasterwhisper_asr.py | 15 +++++++++++---- tools/asr/funasr_asr.py | 8 ++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tools/asr/config.py b/tools/asr/config.py index 025ef38..8fe6838 100644 --- a/tools/asr/config.py +++ b/tools/asr/config.py @@ -13,7 +13,7 @@ def check_fw_local_models(): "large-v2", "large-v3"] for i, size in enumerate(model_size_list): if os.path.exists(f'tools/asr/models/faster-whisper-{size}'): - model_size_list[i] = size + '(local)' + model_size_list[i] = size + '-local' return model_size_list asr_dict = { diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index fa07cf7..7055711 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -8,6 +8,7 @@ from faster_whisper import WhisperModel from tqdm import tqdm from tools.asr.config import check_fw_local_models +from tools.asr.funasr_asr import only_asr os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" @@ -35,8 +36,8 @@ language_code_list = [ "auto"] def execute_asr(input_folder, output_folder, model_size, language,precision): - if 'local' in model_size: - model_size = model_size.split('(')[0] + if '-local' in model_size: + model_size = model_size[:-6] model_path = f'tools/asr/models/faster-whisper-{model_size}' else: model_path = model_size @@ -63,8 +64,14 @@ def execute_asr(input_folder, output_folder, model_size, language,precision): vad_parameters = dict(min_silence_duration_ms=700), language = language) text = '' - for segment in segments: - text += segment.text + + if info.language == "zh": + print("检测为中文文本,转funasr处理") + text = only_asr(file) + + if text == '': + for segment in segments: + text += segment.text output.append(f"{file}|{output_file_name}|{info.language.upper()}|{text}") except: return print(traceback.format_exc()) diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py index f6673b7..106d7d1 100644 --- a/tools/asr/funasr_asr.py +++ b/tools/asr/funasr_asr.py @@ -23,6 +23,14 @@ model = AutoModel( punc_model_revision = "v2.0.4", ) +def only_asr(input_file): + try: + text = model.generate(input=input_file)[0]["text"] + except: + text = '' + print(traceback.format_exc()) + return text + def execute_asr(input_folder, output_folder, model_size, language): input_file_names = os.listdir(input_folder) input_file_names.sort() From c767450bb1a2c88cd6396b43f5df869ac95227d3 Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Sun, 11 Feb 2024 03:54:39 +0800 Subject: [PATCH 02/18] Adjust-tts-clean-text --- GPT_SoVITS/inference_webui.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index fc8af08..a0263fa 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -245,7 +245,14 @@ def splite_en_inf(sentence, language): def clean_text_inf(text, language): - phones, word2ph, norm_text = clean_text(text, language.replace("all_","")) + formattext = "" + language = language.replace("all_","") + for tmp in LangSegment.getTexts(text): + if tmp["lang"] == language: + formattext += tmp["text"] + " " + while " " in formattext: + formattext = formattext.replace(" ", " ") + phones, word2ph, norm_text = clean_text(formattext, language) phones = cleaned_text_to_sequence(phones) return phones, word2ph, norm_text @@ -305,9 +312,8 @@ def nonen_get_bert_inf(text, language): print(langlist) bert_list = [] for i in range(len(textlist)): - text = textlist[i] lang = langlist[i] - phones, word2ph, norm_text = clean_text_inf(text, lang) + phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) bert = get_bert_inf(phones, word2ph, norm_text, lang) bert_list.append(bert) bert = torch.cat(bert_list, dim=1) From 070ac9b2b2d72c3c6c71139bf0e811276e479a32 Mon Sep 17 00:00:00 2001 From: liufenghua Date: Sun, 11 Feb 2024 15:06:01 +0800 Subject: [PATCH 03/18] add DPO training --- .gitignore | 3 +- GPT_SoVITS/AR/models/t2s_model.py | 99 ++++++++++++++++++++++++++++++- GPT_SoVITS/AR/models/utils.py | 69 ++++++++++++++++++++- GPT_SoVITS/inference_webui.py | 10 +++- 4 files changed, 176 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 3a239f8..00f6bb9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ runtime output logs reference -SoVITS_weights \ No newline at end of file +SoVITS_weights +GPT_weights \ No newline at end of file diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py index 083dc09..d3e550d 100644 --- a/GPT_SoVITS/AR/models/t2s_model.py +++ b/GPT_SoVITS/AR/models/t2s_model.py @@ -8,6 +8,9 @@ from AR.models.utils import ( sample, logits_to_probs, multinomial_sample_one_no_sync, + dpo_loss, + make_reject_y, + get_batch_logps ) from AR.modules.embedding import SinePositionalEmbedding from AR.modules.embedding import TokenEmbedding @@ -85,11 +88,104 @@ class Text2SemanticDecoder(nn.Module): ignore_index=self.EOS, ) + def make_input_data(self, x, x_lens, y, y_lens, bert_feature): + x = self.ar_text_embedding(x) + x = x + self.bert_proj(bert_feature.transpose(1, 2)) + x = self.ar_text_position(x) + x_mask = make_pad_mask(x_lens) + + y_mask = make_pad_mask(y_lens) + y_mask_int = y_mask.type(torch.int64) + codes = y.type(torch.int64) * (1 - y_mask_int) + + # Training + # AR Decoder + y, targets = self.pad_y_eos(codes, y_mask_int, eos_id=self.EOS) + x_len = x_lens.max() + y_len = y_lens.max() + y_emb = self.ar_audio_embedding(y) + y_pos = self.ar_audio_position(y_emb) + + xy_padding_mask = torch.concat([x_mask, y_mask], dim=1) + + ar_xy_padding_mask = xy_padding_mask + + x_attn_mask = F.pad( + torch.zeros((x_len, x_len), dtype=torch.bool, device=x.device), + (0, y_len), + value=True, + ) + + y_attn_mask = F.pad( + torch.triu( + torch.ones(y_len, y_len, dtype=torch.bool, device=x.device), + diagonal=1, + ), + (x_len, 0), + value=False, + ) + + xy_attn_mask = torch.concat([x_attn_mask, y_attn_mask], dim=0) + bsz, src_len = x.shape[0], x_len + y_len + _xy_padding_mask = ( + ar_xy_padding_mask.view(bsz, 1, 1, src_len) + .expand(-1, self.num_head, -1, -1) + .reshape(bsz * self.num_head, 1, src_len) + ) + xy_attn_mask = xy_attn_mask.logical_or(_xy_padding_mask) + new_attn_mask = torch.zeros_like(xy_attn_mask, dtype=x.dtype) + new_attn_mask.masked_fill_(xy_attn_mask, float("-inf")) + xy_attn_mask = new_attn_mask + # x 和完整的 y 一次性输入模型 + xy_pos = torch.concat([x, y_pos], dim=1) + + return xy_pos, xy_attn_mask, targets + def forward(self, x, x_lens, y, y_lens, bert_feature): """ x: phoneme_ids y: semantic_ids """ + + reject_y, reject_y_lens = make_reject_y(y, y_lens) + + xy_pos, xy_attn_mask, targets = self.make_input_data(x, x_lens, y, y_lens, bert_feature) + + xy_dec, _ = self.h( + (xy_pos, None), + mask=xy_attn_mask, + ) + x_len = x_lens.max() + logits = self.ar_predict_layer(xy_dec[:, x_len:]) + + ###### DPO ############# + reject_xy_pos, reject_xy_attn_mask, reject_targets = self.make_input_data(x, x_lens, reject_y, reject_y_lens, bert_feature) + + reject_xy_dec, _ = self.h( + (reject_xy_pos, None), + mask=reject_xy_attn_mask, + ) + x_len = x_lens.max() + reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len:]) + + # loss + # from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum + + loss_1 = F.cross_entropy(logits.permute(0, 2, 1), targets, reduction="sum") + acc = self.ar_accuracy_metric(logits.permute(0, 2, 1).detach(), targets).item() + + A_logits, R_logits = get_batch_logps(logits, reject_logits, targets, reject_targets) + loss_2, _, _ = dpo_loss(A_logits, R_logits, 0, 0, 0.2, reference_free=True) + + loss = loss_1 + loss_2 + + return loss, acc + + def forward_old(self, x, x_lens, y, y_lens, bert_feature): + """ + x: phoneme_ids + y: semantic_ids + """ x = self.ar_text_embedding(x) x = x + self.bert_proj(bert_feature.transpose(1, 2)) x = self.ar_text_position(x) @@ -231,6 +327,7 @@ class Text2SemanticDecoder(nn.Module): prompts, ####参考音频token bert_feature, top_k: int = -100, + top_p: int = 100, early_stop_num: int = -1, temperature: float = 1.0, ): @@ -305,7 +402,7 @@ class Text2SemanticDecoder(nn.Module): if(idx==0):###第一次跑不能EOS否则没有了 logits = logits[:, :-1] ###刨除1024终止符号的概率 samples = sample( - logits[0], y, top_k=top_k, top_p=1.0, repetition_penalty=1.35 + logits[0], y, top_k=top_k, top_p=top_p, repetition_penalty=1.05, temperature=temperature )[0].unsqueeze(0) if early_stop_num != -1 and (y.shape[1] - prefix_len) > early_stop_num: print("use early stop num:", early_stop_num) diff --git a/GPT_SoVITS/AR/models/utils.py b/GPT_SoVITS/AR/models/utils.py index 25fe446..bc5f2d0 100644 --- a/GPT_SoVITS/AR/models/utils.py +++ b/GPT_SoVITS/AR/models/utils.py @@ -1,7 +1,7 @@ # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/model/utils.py\ import torch import torch.nn.functional as F - +from typing import Tuple def sequence_mask(length, max_length=None): if max_length is None: @@ -158,3 +158,70 @@ def sample( ) idx_next = multinomial_sample_one_no_sync(probs) return idx_next, probs + +def dpo_loss(policy_chosen_logps: torch.FloatTensor, + policy_rejected_logps: torch.FloatTensor, + reference_chosen_logps: torch.FloatTensor, + reference_rejected_logps: torch.FloatTensor, + beta: float, + reference_free: bool = False) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + pi_logratios = policy_chosen_logps - policy_rejected_logps + ref_logratios = reference_chosen_logps - reference_rejected_logps + + if reference_free: + ref_logratios = 0 + + logits = pi_logratios - ref_logratios + + losses = -F.logsigmoid(beta * logits) + chosen_rewards = beta * (policy_chosen_logps - reference_chosen_logps).detach() + rejected_rewards = beta * (policy_rejected_logps - reference_rejected_logps).detach() + + return losses.mean(), chosen_rewards, rejected_rewards + +def get_batch_logps(logits_target: torch.FloatTensor, logits_reject: torch.FloatTensor, labels_target: torch.LongTensor, labels_reject: torch.LongTensor, average_log_prob: bool = False) -> Tuple[torch.FloatTensor, torch.FloatTensor]: + + # dummy token; we'll ignore the losses on these tokens later + + per_token_logps_target = torch.gather(logits_target.log_softmax(-1), dim=2, index=labels_target.unsqueeze(2)).squeeze(2) + per_token_logps_reject = torch.gather(logits_reject.log_softmax(-1), dim=2, index=labels_reject.unsqueeze(2)).squeeze(2) + + return per_token_logps_target.sum(-1), per_token_logps_reject.sum(-1) + +def make_reject_y(y_o, y_lens): + def repeat_P(y): + range_idx, _ = torch.randint(0, len(y), size=(2,)).sort() + pre = y[:range_idx[0]] + shf = y[range_idx[1]:] + range_text = y[range_idx[0]:range_idx[1]] + new_y = torch.cat([pre, range_text, range_text, shf]) + return new_y + def lost_P(y): + range_idx, _ = torch.randint(0, len(y), size=(2,)).sort() + pre = y[:range_idx[0]] + shf = y[range_idx[1]:] + range_text = y[range_idx[0]:range_idx[1]] + new_y = torch.cat([pre, shf]) + return new_y + bs = len(y_lens) + reject_y = [] + reject_y_lens = [] + for b in range(bs): + process_item_idx = torch.randint(0, 1, size=(1, ))[0] + if process_item_idx == 0: + new_y = repeat_P(y_o[b]) + reject_y.append(new_y) + reject_y_lens.append(len(new_y)) + elif process_item_idx==1: + new_y = lost_P(y_o[b]) + reject_y.append(new_y) + reject_y_lens.append(len(new_y)) + max_length = max(reject_y_lens) + for b in range(bs): + pad_length = max_length - reject_y_lens[b] + reject_y[b] = torch.cat([reject_y[b], torch.zeros(pad_length, dtype=y_o.dtype, device=y_o.device)], dim=0) + + reject_y = torch.stack(reject_y, dim = 0) + reject_y_lens = torch.tensor(reject_y_lens, device=y_lens.device) + + return reject_y, reject_y_lens diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index fc8af08..a85b611 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -359,7 +359,7 @@ def merge_short_text_in_array(texts, threshold): result[len(result) - 1] += text return result -def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切")): +def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6): t0 = ttime() prompt_language = dict_language[prompt_language] text_language = dict_language[text_language] @@ -438,7 +438,9 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, prompt, bert, # prompt_phone_len=ph_offset, - top_k=config["inference"]["top_k"], + top_k=top_k, + top_p=top_p, + temperature=temperature, early_stop_num=hz * max_sec, ) t3 = ttime() @@ -615,6 +617,10 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: value=i18n("凑四句一切"), interactive=True, ) + with gr.Row(): + top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=20,interactive=True) + top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=0.6,interactive=True) + temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=0.6,interactive=True) inference_button = gr.Button(i18n("合成语音"), variant="primary") output = gr.Audio(label=i18n("输出的语音")) From 0c8832727e704b42ebf6483fc03cca0826c49567 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:47:30 +0800 Subject: [PATCH 04/18] Update inference_webui.py --- GPT_SoVITS/inference_webui.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 566985a..9c5197a 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -624,15 +624,15 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: interactive=True, ) with gr.Row(): - top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=20,interactive=True) - top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=0.6,interactive=True) - temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=0.6,interactive=True) + top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True) + top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True) + temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True) inference_button = gr.Button(i18n("合成语音"), variant="primary") output = gr.Audio(label=i18n("输出的语音")) inference_button.click( get_tts_wav, - [inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut], + [inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut,top_k,top_p,temperature], [output], ) From 3bf71d3790076042ea888fb07d91e0b518f68992 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:47:43 +0800 Subject: [PATCH 05/18] Update t2s_model.py --- GPT_SoVITS/AR/models/t2s_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py index d3e550d..aaeace9 100644 --- a/GPT_SoVITS/AR/models/t2s_model.py +++ b/GPT_SoVITS/AR/models/t2s_model.py @@ -402,7 +402,7 @@ class Text2SemanticDecoder(nn.Module): if(idx==0):###第一次跑不能EOS否则没有了 logits = logits[:, :-1] ###刨除1024终止符号的概率 samples = sample( - logits[0], y, top_k=top_k, top_p=top_p, repetition_penalty=1.05, temperature=temperature + logits[0], y, top_k=top_k, top_p=top_p, repetition_penalty=1.35, temperature=temperature )[0].unsqueeze(0) if early_stop_num != -1 and (y.shape[1] - prefix_len) > early_stop_num: print("use early stop num:", early_stop_num) From 2fa74ecb941db27d9015583a9be6962898d66730 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:00:31 +0800 Subject: [PATCH 06/18] Update fasterwhisper_asr.py --- tools/asr/fasterwhisper_asr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py index 7055711..5f49de7 100644 --- a/tools/asr/fasterwhisper_asr.py +++ b/tools/asr/fasterwhisper_asr.py @@ -1,5 +1,6 @@ import argparse import os +os.environ["HF_ENDPOINT"]="https://hf-mirror.com" import traceback import requests from glob import glob @@ -103,4 +104,4 @@ if __name__ == '__main__': model_size = cmd.model_size, language = cmd.language, precision = cmd.precision, - ) \ No newline at end of file + ) From d82f6bbb98ba725e6725dcee99b80ce71fb0bf28 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:03:40 +0800 Subject: [PATCH 07/18] Update funasr_asr.py --- tools/asr/funasr_asr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py index 106d7d1..6aa3038 100644 --- a/tools/asr/funasr_asr.py +++ b/tools/asr/funasr_asr.py @@ -43,7 +43,7 @@ def execute_asr(input_folder, output_folder, model_size, language): text = model.generate(input="%s/%s"%(input_folder, name))[0]["text"] output.append(f"{input_folder}/{name}|{output_file_name}|{language.upper()}|{text}") except: - return print(traceback.format_exc()) + print(traceback.format_exc()) output_folder = output_folder or "output/asr_opt" os.makedirs(output_folder, exist_ok=True) @@ -73,4 +73,4 @@ if __name__ == '__main__': output_folder = cmd.output_folder, model_size = cmd.model_size, language = cmd.language, - ) \ No newline at end of file + ) From f99a473f9ba10b2756361f7bb30cfae442b50d9f Mon Sep 17 00:00:00 2001 From: Yuan-Man <68322456+Yuan-ManX@users.noreply.github.com> Date: Mon, 12 Feb 2024 23:09:42 +0800 Subject: [PATCH 08/18] Update fr_FR.json --- i18n/locale/fr_FR.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/i18n/locale/fr_FR.json b/i18n/locale/fr_FR.json index 4e0b963..99d83ac 100644 --- a/i18n/locale/fr_FR.json +++ b/i18n/locale/fr_FR.json @@ -8,8 +8,16 @@ "是否开启UVR5-WebUI": "Activer UVR5-WebUI", "UVR5进程输出信息": "Informations de processus UVR5", "0b-语音切分工具": "0b-Outil de découpage vocal", + ".list标注文件的路径": "Chemin du fichier d'annotation .list", + "GPT模型列表": "Liste des modèles GPT", + "SoVITS模型列表": "Liste des modèles SoVITS", + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "Répertoire où sont enregistrés les fichiers audio après la découpe ! Chemin complet du fichier audio à lire = ce répertoire - nom du fichier correspondant à la forme d'onde dans le fichier liste (pas le chemin complet).", "音频自动切分输入路径,可文件可文件夹": "Chemin d'entrée automatique de découpage audio, peut être un fichier ou un dossier", "切分后的子音频的输出根目录": "Répertoire racine de sortie des sous-audios après découpage", + "怎么切": "Comment découper", + "不切": "Pas de découpe", + "凑四句一切": "Composez quatre phrases pour tout remplir", + "按英文句号.切": "Découpez par des points en anglais", "threshold:音量小于这个值视作静音的备选切割点": "seuil: le volume inférieur à cette valeur est considéré comme un point de coupe silencieux alternatif", "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: longueur minimale de chaque segment, si le premier segment est trop court, il est continué avec le segment suivant jusqu'à dépasser cette valeur", "min_interval:最短切割间隔": "min_interval: intervalle de coupe minimum", From e30d937c9a908bce195b4e8754bc041d8ba9f0f1 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 13 Feb 2024 00:26:39 +0800 Subject: [PATCH 09/18] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index 74789cd..dee3fcc 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -107,8 +107,18 @@ 1-GPT训练卡死(win10 1909)和https://github.com/RVC-Boss/GPT-SoVITS/issues/232 (系统语言繁体)GPT训练报错,[尝试修复](https://github.com/RVC-Boss/GPT-SoVITS/commit/59f35adad85815df27e9c6b33d420f5ebfd8376b)。 +### 20240212更新 + +1-faster whisper和funasr逻辑优化。faster whisper转镜像站下载,规避huggingface连不上的问题。 + +2-DPO Loss实验性训练选项开启,通过构造负样本训练缓解GPT重复漏字问题。推理界面公开几个推理参数。 https://github.com/RVC-Boss/GPT-SoVITS/pull/457 + + todolist: 1-中文多音字推理优化 +2-训练支持中文实验名(原来会报错) + + From 3857c7d15b5771569dbf256a06c1d988f6bb9a71 Mon Sep 17 00:00:00 2001 From: Kenn Zhang Date: Tue, 13 Feb 2024 03:14:01 +0000 Subject: [PATCH 10/18] =?UTF-8?q?=E6=96=B0=E5=A2=9Eis=5Fshare=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E4=B9=9F=E4=BB=8E=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F?= =?UTF-8?q?=E4=B8=AD=E8=8E=B7=E5=8F=96=EF=BC=9B=E6=9B=B4=E6=94=B9is=5Fhalf?= =?UTF-8?q?/is=5Fshare=E4=BB=8E=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F?= =?UTF-8?q?=E4=B8=AD=E8=8E=B7=E5=8F=96=E5=80=BC=E7=9A=84=E6=96=B9=E5=BC=8F?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index fef2fac..3e9e951 100644 --- a/config.py +++ b/config.py @@ -5,8 +5,10 @@ import torch # 推理用的指定模型 sovits_path = "" gpt_path = "" -is_half = eval(os.environ.get("is_half","True")) -is_share=False +is_half_str = os.environ.get("is_half", "True") +is_half = True if is_half_str.lower() == 'true' else False +is_share_str = os.environ.get("is_share","False") +is_share= True if is_share_str.lower() == 'true' else False cnhubert_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base" bert_path = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large" From c990387f7e0d44e62e8c1db66a6bd247927e0bc6 Mon Sep 17 00:00:00 2001 From: Kenn Zhang Date: Tue, 13 Feb 2024 03:33:44 +0000 Subject: [PATCH 11/18] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=8E=9F=E6=9C=89Docke?= =?UTF-8?q?rfile=E5=92=8C=E6=96=87=E6=A1=A3=E4=B8=AD=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E7=9A=84=E7=AB=AF=E5=8F=A3=E5=8F=B7=EF=BC=9BDockerfile?= =?UTF-8?q?=E4=B8=AD=E5=8F=AF=E4=BB=A5=E5=8E=BB=E6=8E=89=20VOLUME=20?= =?UTF-8?q?=E7=9A=84=E5=A3=B0=E6=98=8E=EF=BC=8C=E5=90=8C=E6=97=B6=E5=B0=86?= =?UTF-8?q?=E7=AB=AF=E5=8F=A3=E6=9A=B4=E9=9C=B2=E7=BC=A9=E5=87=8F=E6=88=90?= =?UTF-8?q?=E4=B8=80=E8=A1=8C=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 11 +---------- README.md | 2 +- docker-compose.yaml | 5 +++-- docs/cn/README.md | 2 +- docs/ja/README.md | 2 +- docs/ko/README.md | 2 +- 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1daaf6f..74e282c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,15 +40,6 @@ COPY . /workspace # Copy the rest of the application COPY . /workspace - -EXPOSE 9870 -EXPOSE 9871 -EXPOSE 9872 -EXPOSE 9873 -EXPOSE 9874 - -VOLUME /workspace/output -VOLUME /workspace/logs -VOLUME /workspace/SoVITS_weights +EXPOSE 9871 9872 9873 9874 9880 CMD ["python", "webui.py"] diff --git a/README.md b/README.md index c0eda03..72f3694 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ docker compose -f "docker-compose.yaml" up -d As above, modify the corresponding parameters based on your actual situation, then run the following command: ``` -docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx +docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx ``` ## Dataset Format diff --git a/docker-compose.yaml b/docker-compose.yaml index 874824e..aca8ab9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,10 +2,11 @@ version: '3.8' services: gpt-sovits: - image: breakstring/gpt-sovits:xxxxx # please change the image name and tag base your environment + image: breakstring/gpt-sovits:latest # please change the image name and tag base your environment. If the tag contains the word 'elite', such as "latest-elite", it indicates that the image does not include the necessary models such as GPT-SoVITS, UVR5, Damo ASR, etc. You will need to download them yourself and map them into the container. container_name: gpt-sovits-container environment: - is_half=False + - is_share=False volumes: - ./output:/workspace/output - ./logs:/workspace/logs @@ -13,7 +14,7 @@ services: - ./reference:/workspace/reference working_dir: /workspace ports: - - "9870:9870" + - "9880:9880" - "9871:9871" - "9872:9872" - "9873:9873" diff --git a/docs/cn/README.md b/docs/cn/README.md index 5c87dcf..8d3ca49 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -138,7 +138,7 @@ docker compose -f "docker-compose.yaml" up -d 同上,根据您自己的实际情况修改对应的参数,然后运行如下命令: ``` -docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx +docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx ``` ### 预训练模型 diff --git a/docs/ja/README.md b/docs/ja/README.md index 26074b9..aa300c8 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -138,7 +138,7 @@ docker compose -f "docker-compose.yaml" up -d 上記と同様に、実際の状況に基づいて対応するパラメータを変更し、次のコマンドを実行します: ```markdown -docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx +docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx ``` ### 事前訓練済みモデル diff --git a/docs/ko/README.md b/docs/ko/README.md index e348885..afcdd66 100644 --- a/docs/ko/README.md +++ b/docs/ko/README.md @@ -141,7 +141,7 @@ docker compose -f "docker-compose.yaml" up -d 위와 동일하게 실제 상황에 맞게 매개변수를 수정한 다음 다음 명령을 실행합니다: ``` -docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9870:9870 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx +docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx ``` ### 사전 훈련된 모델 From dd2c4d6d7121bf82d29d0f0e4d788f3b231997c8 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Thu, 15 Feb 2024 16:31:06 +0800 Subject: [PATCH 12/18] support chinese name as model(exp) name support chinese name as model(exp) name --- GPT_SoVITS/prepare_datasets/1-get-text.py | 14 +++++++------- GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/GPT_SoVITS/prepare_datasets/1-get-text.py b/GPT_SoVITS/prepare_datasets/1-get-text.py index 88c9d85..5873164 100644 --- a/GPT_SoVITS/prepare_datasets/1-get-text.py +++ b/GPT_SoVITS/prepare_datasets/1-get-text.py @@ -33,13 +33,13 @@ from time import time as ttime import shutil -def my_save(fea, path): #####fix issue: torch.save doesn't support chinese path - dir = os.path.dirname(path) - name = os.path.basename(path) - tmp_path = "%s/%s%s.pth" % (dir, ttime(), i_part) - torch.save(fea, tmp_path) - shutil.move(tmp_path, "%s/%s" % (dir, name)) - +def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path + dir=os.path.dirname(path) + name=os.path.basename(path) + # tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part) + tmp_path="%s%s.pth"%(ttime(),i_part) + torch.save(fea,tmp_path) + shutil.move(tmp_path,"%s/%s"%(dir,name)) txt_path = "%s/2-name2text-%s.txt" % (opt_dir, i_part) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index b8355dd..4bc4ad6 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -35,7 +35,8 @@ import shutil def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path dir=os.path.dirname(path) name=os.path.basename(path) - tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part) + # tmp_path="%s/%s%s.pth"%(dir,ttime(),i_part) + tmp_path="%s%s.pth"%(ttime(),i_part) torch.save(fea,tmp_path) shutil.move(tmp_path,"%s/%s"%(dir,name)) From ccb9b08be3c58e102defcc94ff4fd609da9e27ee Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Thu, 15 Feb 2024 20:16:58 +0800 Subject: [PATCH 13/18] =?UTF-8?q?dpo=E6=94=B9=E5=AE=9E=E9=AA=8C=E6=80=A7?= =?UTF-8?q?=E5=8B=BE=E9=80=89=E8=80=8C=E9=9D=9E=E5=BF=85=E9=A1=BB=E3=80=82?= =?UTF-8?q?=E5=8B=BE=E9=80=89=E5=90=8Ebatch=20size=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E5=87=8F=E5=8D=8A=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dpo改实验性勾选而非必须。勾选后batch size自动减半。 --- webui.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/webui.py b/webui.py index fa40c3a..cff7cdb 100644 --- a/webui.py +++ b/webui.py @@ -266,7 +266,7 @@ def close1Ba(): return "已终止SoVITS训练",{"__type__":"update","visible":True},{"__type__":"update","visible":False} p_train_GPT=None -def open1Bb(batch_size,total_epoch,exp_name,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1): +def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers,pretrained_s1): global p_train_GPT if(p_train_GPT==None): with open("GPT_SoVITS/configs/s1longer.yaml")as f: @@ -283,6 +283,7 @@ def open1Bb(batch_size,total_epoch,exp_name,if_save_latest,if_save_every_weights data["train"]["save_every_n_epoch"]=save_every_epoch data["train"]["if_save_every_weights"]=if_save_every_weights data["train"]["if_save_latest"]=if_save_latest + data["train"]["if_dpo"]=if_dpo data["train"]["half_weights_save_dir"]=GPT_weight_root data["train"]["exp_name"]=exp_name data["train_semantic_path"]="%s/6-name2semantic.tsv"%s1_dir @@ -807,6 +808,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: with gr.Row(): batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True) total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True) + if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True) if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True) if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True) save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True) @@ -817,7 +819,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息")) button1Ba_open.click(open1Ba, [batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D], [info1Ba,button1Ba_open,button1Ba_close]) button1Ba_close.click(close1Ba, [], [info1Ba,button1Ba_open,button1Ba_close]) - button1Bb_open.click(open1Bb, [batch_size1Bb,total_epoch1Bb,exp_name,if_save_latest1Bb,if_save_every_weights1Bb,save_every_epoch1Bb,gpu_numbers1Bb,pretrained_s1], [info1Bb,button1Bb_open,button1Bb_close]) + button1Bb_open.click(open1Bb, [batch_size1Bb,total_epoch1Bb,exp_name,if_dpo,if_save_latest1Bb,if_save_every_weights1Bb,save_every_epoch1Bb,gpu_numbers1Bb,pretrained_s1], [info1Bb,button1Bb_open,button1Bb_close]) button1Bb_close.click(close1Bb, [], [info1Bb,button1Bb_open,button1Bb_close]) with gr.TabItem(i18n("1C-推理")): gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。")) From 9b5231a317f2940355fcf7e1f1d609b2031774cd Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Thu, 15 Feb 2024 20:17:33 +0800 Subject: [PATCH 14/18] =?UTF-8?q?dpo=E6=94=B9=E5=AE=9E=E9=AA=8C=E6=80=A7?= =?UTF-8?q?=E5=8B=BE=E9=80=89=E8=80=8C=E9=9D=9E=E5=BF=85=E9=A1=BB=E3=80=82?= =?UTF-8?q?=E5=8B=BE=E9=80=89=E5=90=8Ebatch=20size=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E5=87=8F=E5=8D=8A=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dpo改实验性勾选而非必须。勾选后batch size自动减半。 --- GPT_SoVITS/AR/data/data_module.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPT_SoVITS/AR/data/data_module.py b/GPT_SoVITS/AR/data/data_module.py index 037484a..54d4634 100644 --- a/GPT_SoVITS/AR/data/data_module.py +++ b/GPT_SoVITS/AR/data/data_module.py @@ -41,7 +41,8 @@ class Text2SemanticDataModule(LightningDataModule): # pad_val=self.config['data']['pad_val']) def train_dataloader(self): - batch_size = max(min(self.config["train"]["batch_size"],len(self._train_dataset)//4),1)#防止不保存 + batch_size=self.config["train"]["batch_size"]//2 if self.config["train"].get("if_dpo",False)==True else self.config["train"]["batch_size"] + batch_size = max(min(batch_size,len(self._train_dataset)//4),1)#防止不保存 sampler = DistributedBucketSampler(self._train_dataset, batch_size=batch_size) return DataLoader( self._train_dataset, From 895fde46e420040ed26aaf0c5b7e99359d9b199b Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Thu, 15 Feb 2024 20:17:44 +0800 Subject: [PATCH 15/18] =?UTF-8?q?dpo=E6=94=B9=E5=AE=9E=E9=AA=8C=E6=80=A7?= =?UTF-8?q?=E5=8B=BE=E9=80=89=E8=80=8C=E9=9D=9E=E5=BF=85=E9=A1=BB=E3=80=82?= =?UTF-8?q?=E5=8B=BE=E9=80=89=E5=90=8Ebatch=20size=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E5=87=8F=E5=8D=8A=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dpo改实验性勾选而非必须。勾选后batch size自动减半。 --- GPT_SoVITS/AR/models/t2s_lightning_module.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module.py b/GPT_SoVITS/AR/models/t2s_lightning_module.py index f9dfc64..594b73b 100644 --- a/GPT_SoVITS/AR/models/t2s_lightning_module.py +++ b/GPT_SoVITS/AR/models/t2s_lightning_module.py @@ -11,7 +11,6 @@ from AR.models.t2s_model import Text2SemanticDecoder from AR.modules.lr_schedulers import WarmupCosineLRSchedule from AR.modules.optim import ScaledAdam - class Text2SemanticLightningModule(LightningModule): def __init__(self, config, output_dir, is_train=True): super().__init__() @@ -35,7 +34,8 @@ class Text2SemanticLightningModule(LightningModule): def training_step(self, batch: Dict, batch_idx: int): opt = self.optimizers() scheduler = self.lr_schedulers() - loss, acc = self.model.forward( + forward=self.model.forward if self.config["train"].get("if_dpo",False)==True else self.model.forward_old + loss, acc = forward( batch["phoneme_ids"], batch["phoneme_ids_len"], batch["semantic_ids"], From d00f816f7730aa45f8146d26d96486e4e0408b3c Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Thu, 15 Feb 2024 20:19:41 +0800 Subject: [PATCH 16/18] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index dee3fcc..4a64618 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -113,12 +113,15 @@ 2-DPO Loss实验性训练选项开启,通过构造负样本训练缓解GPT重复漏字问题。推理界面公开几个推理参数。 https://github.com/RVC-Boss/GPT-SoVITS/pull/457 +### 20240214更新 + +1-训练支持中文实验名(原来会报错) + +2-DPO训练改为可勾选选项而非必须。如勾选batch size自动减半。修复推理界面新参数不传参的问题。 todolist: 1-中文多音字推理优化 -2-训练支持中文实验名(原来会报错) - From 7b0c3c676495c64b2064aa472bff14b5c06206a5 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:57:40 +0800 Subject: [PATCH 17/18] fix issue475 fix https://github.com/RVC-Boss/GPT-SoVITS/issues/475 From 7fc216107baadebc6fd5ae4d9c6ad9e0f8bb5f7c Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Fri, 16 Feb 2024 00:07:35 +0800 Subject: [PATCH 18/18] Add files via upload --- GPT_SoVITS/text/tone_sandhi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GPT_SoVITS/text/tone_sandhi.py b/GPT_SoVITS/text/tone_sandhi.py index 9f62abe..c557dad 100644 --- a/GPT_SoVITS/text/tone_sandhi.py +++ b/GPT_SoVITS/text/tone_sandhi.py @@ -672,6 +672,7 @@ class ToneSandhi: and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v" + and seg[i + 1][1] == "v" ): new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0] else: