From 93dd8334f4ce7fb5ccdeabebe05deb26a3cf30fb Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 24 Jan 2024 17:02:18 +0800 Subject: [PATCH] Update api.py --- api.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/api.py b/api.py index 376b0bc..725b12d 100644 --- a/api.py +++ b/api.py @@ -192,13 +192,18 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language) t0 = ttime() prompt_text = prompt_text.strip("\n") prompt_language, text = prompt_language, text.strip("\n") + zero_wav = np.zeros(int(hps.data.sampling_rate * 0.3), dtype=np.float16 if is_half == True else np.float32) with torch.no_grad(): - wav16k, sr = librosa.load(ref_wav_path, sr=16000) # 派蒙 + wav16k, sr = librosa.load(ref_wav_path, sr=16000) wav16k = torch.from_numpy(wav16k) + zero_wav_torch = torch.from_numpy(zero_wav) if (is_half == True): wav16k = wav16k.half().to(device) + zero_wav_torch = zero_wav_torch.half().to(device) else: wav16k = wav16k.to(device) + zero_wav_torch = zero_wav_torch.to(device) + wav16k=torch.cat([wav16k,zero_wav_torch]) ssl_content = ssl_model.model(wav16k.unsqueeze(0))["last_hidden_state"].transpose(1, 2) # .float() codes = vq_model.extract_latent(ssl_content) prompt_semantic = codes[0, 0] @@ -209,7 +214,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language) phones1 = cleaned_text_to_sequence(phones1) texts = text.split("\n") audio_opt = [] - zero_wav = np.zeros(int(hps.data.sampling_rate * 0.3), dtype=np.float16 if is_half == True else np.float32) + for text in texts: phones2, word2ph2, norm_text2 = clean_text(text, text_language) phones2 = cleaned_text_to_sequence(phones2)