From a01b17c54f5f94a9861dc6d99056f8b43d75b815 Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:09:43 +0000 Subject: [PATCH 1/4] =?UTF-8?q?=E4=B8=80=E4=B8=AA=E5=A5=87=E5=A5=87?= =?UTF-8?q?=E6=80=AA=E6=80=AA=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/api.py b/api.py index 34adfbe9..06350cf1 100644 --- a/api.py +++ b/api.py @@ -109,7 +109,7 @@ import sys now_dir = os.getcwd() sys.path.append(now_dir) sys.path.append("%s/GPT_SoVITS" % (now_dir)) - +import re import signal from time import time as ttime import torch @@ -402,6 +402,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language) t2 = ttime() with torch.no_grad(): # pred_semantic = t2s_model.model.infer( + print("-"*30) pred_semantic, idx = t2s_model.model.infer_panel( all_phoneme_ids, all_phoneme_len, @@ -411,8 +412,15 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language) top_k=config['inference']['top_k'], early_stop_num=hz * max_sec) t3 = ttime() - # print(pred_semantic.shape,idx) - pred_semantic = pred_semantic[:, -idx:].unsqueeze(0) # .unsqueeze(0)#mq要多unsqueeze一次 + # print(pred_semantic[:,]) + if isinstance(pred_semantic, list) and isinstance(pred_semantic, list): + pred_semantic = pred_semantic[0] + idx=idx[0] + pred_semantic = pred_semantic[-idx:] + pred_semantic = pred_semantic.unsqueeze(0).unsqueeze(0) + else: + pred_semantic = pred_semantic[:,-idx:] + pred_semantic = pred_semantic.unsqueeze(0) # .unsqueeze(0)#mq要多unsqueeze一次 refer = get_spepc(hps, ref_wav_path) # .to(device) if (is_half == True): refer = refer.half().to(device) From fa235308c99724515ea6a4f2968abdf1345e7014 Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:12:04 +0000 Subject: [PATCH 2/4] Update api.py delete useless code --- api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api.py b/api.py index 06350cf1..bba76116 100644 --- a/api.py +++ b/api.py @@ -109,7 +109,7 @@ import sys now_dir = os.getcwd() sys.path.append(now_dir) sys.path.append("%s/GPT_SoVITS" % (now_dir)) -import re + import signal from time import time as ttime import torch From e0082d77205b60056ccc6a542d59e18430d176ed Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:44:23 +0000 Subject: [PATCH 3/4] Update api.py --- api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/api.py b/api.py index bba76116..5b8120d7 100644 --- a/api.py +++ b/api.py @@ -402,7 +402,6 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language) t2 = ttime() with torch.no_grad(): # pred_semantic = t2s_model.model.infer( - print("-"*30) pred_semantic, idx = t2s_model.model.infer_panel( all_phoneme_ids, all_phoneme_len, From 24218b27b44dced7320238f4bf0c115f1ff3ebb8 Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Wed, 20 Mar 2024 20:07:18 +0000 Subject: [PATCH 4/4] Update api.py --- api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api.py b/api.py index 5b8120d7..f94de103 100644 --- a/api.py +++ b/api.py @@ -411,7 +411,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language) top_k=config['inference']['top_k'], early_stop_num=hz * max_sec) t3 = ttime() - # print(pred_semantic[:,]) + # print(pred_semantic.shape,idx) if isinstance(pred_semantic, list) and isinstance(pred_semantic, list): pred_semantic = pred_semantic[0] idx=idx[0]