mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-05 22:20:01 +08:00
适配v4版本
This commit is contained in:
parent
30fdb60295
commit
0ba0ebcf65
@ -106,11 +106,12 @@ def speed_change(input_audio: np.ndarray, speed: float, sr: int):
|
||||
resample_transform_dict = {}
|
||||
|
||||
|
||||
def resample(audio_tensor, sr0, device):
|
||||
def resample(audio_tensor, sr0, sr1, device):
|
||||
global resample_transform_dict
|
||||
if sr0 not in resample_transform_dict:
|
||||
resample_transform_dict[sr0] = torchaudio.transforms.Resample(sr0, 24000).to(device)
|
||||
return resample_transform_dict[sr0](audio_tensor)
|
||||
key="%s-%s"%(sr0,sr1)
|
||||
if key not in resample_transform_dict:
|
||||
resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
|
||||
return resample_transform_dict[key](audio_tensor)
|
||||
|
||||
|
||||
class DictToAttrRecursive(dict):
|
||||
@ -1372,9 +1373,10 @@ class TTS:
|
||||
if ref_audio.shape[0] == 2:
|
||||
ref_audio = ref_audio.mean(0).unsqueeze(0)
|
||||
|
||||
tgt_sr = self.vocoder_configs["sr"]
|
||||
# tgt_sr = self.vocoder_configs["sr"]
|
||||
tgt_sr = 24000 if self.configs.version == "v3" else 32000
|
||||
if ref_sr != tgt_sr:
|
||||
ref_audio = resample(ref_audio, ref_sr, self.configs.device)
|
||||
ref_audio = resample(ref_audio, ref_sr, tgt_sr, self.configs.device)
|
||||
|
||||
mel2 = mel_fn(ref_audio) if self.configs.version == "v3" else mel_fn_v4(ref_audio)
|
||||
mel2 = norm_spec(mel2)
|
||||
@ -1437,12 +1439,11 @@ class TTS:
|
||||
ref_audio = ref_audio.to(self.configs.device).float()
|
||||
if ref_audio.shape[0] == 2:
|
||||
ref_audio = ref_audio.mean(0).unsqueeze(0)
|
||||
if ref_sr != 24000:
|
||||
ref_audio = resample(ref_audio, ref_sr, self.configs.device)
|
||||
|
||||
tgt_sr = self.vocoder_configs["sr"]
|
||||
|
||||
# tgt_sr = self.vocoder_configs["sr"]
|
||||
tgt_sr = 24000 if self.configs.version == "v3" else 32000
|
||||
if ref_sr != tgt_sr:
|
||||
ref_audio = resample(ref_audio, ref_sr, self.configs.device)
|
||||
ref_audio = resample(ref_audio, ref_sr, tgt_sr, self.configs.device)
|
||||
|
||||
mel2 = mel_fn(ref_audio) if self.configs.version == "v3" else mel_fn_v4(ref_audio)
|
||||
mel2 = norm_spec(mel2)
|
||||
|
@ -3,9 +3,9 @@ custom:
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
device: cuda
|
||||
is_half: true
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt
|
||||
version: v2
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth
|
||||
t2s_weights_path: GPT_SoVITS/pretrained_models/s1v3.ckpt
|
||||
version: v3
|
||||
vits_weights_path: GPT_SoVITS/pretrained_models/s2Gv3.pth
|
||||
v1:
|
||||
bert_base_path: GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large
|
||||
cnhuhbert_base_path: GPT_SoVITS/pretrained_models/chinese-hubert-base
|
||||
|
@ -397,7 +397,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
||||
minimum=1, maximum=200, step=1, label=i18n("batch_size"), value=20, interactive=True
|
||||
)
|
||||
sample_steps = gr.Radio(
|
||||
label=i18n("采样步数(仅对V3生效)"), value=32, choices=[4, 8, 16, 32], visible=True
|
||||
label=i18n("采样步数(仅对V3/4生效)"), value=32, choices=[4, 8, 16, 32], visible=True
|
||||
)
|
||||
with gr.Row():
|
||||
fragment_interval = gr.Slider(
|
||||
|
Loading…
x
Reference in New Issue
Block a user