From a01b17c54f5f94a9861dc6d99056f8b43d75b815 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Wed, 20 Mar 2024 19:09:43 +0000
Subject: [PATCH 1/4] =?UTF-8?q?=E4=B8=80=E4=B8=AA=E5=A5=87=E5=A5=87?=
 =?UTF-8?q?=E6=80=AA=E6=80=AA=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/api.py b/api.py
index 34adfbe9..06350cf1 100644
--- a/api.py
+++ b/api.py
@@ -109,7 +109,7 @@ import sys
 now_dir = os.getcwd()
 sys.path.append(now_dir)
 sys.path.append("%s/GPT_SoVITS" % (now_dir))
-
+import re
 import signal
 from time import time as ttime
 import torch
@@ -402,6 +402,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
         t2 = ttime()
         with torch.no_grad():
             # pred_semantic = t2s_model.model.infer(
+            print("-"*30)
             pred_semantic, idx = t2s_model.model.infer_panel(
                 all_phoneme_ids,
                 all_phoneme_len,
@@ -411,8 +412,15 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
                 top_k=config['inference']['top_k'],
                 early_stop_num=hz * max_sec)
         t3 = ttime()
-        # print(pred_semantic.shape,idx)
-        pred_semantic = pred_semantic[:, -idx:].unsqueeze(0)  # .unsqueeze(0)#mq要多unsqueeze一次
+        # print(pred_semantic[:,])
+        if isinstance(pred_semantic, list) and isinstance(pred_semantic, list):
+            pred_semantic = pred_semantic[0]
+            idx=idx[0]
+            pred_semantic = pred_semantic[-idx:]
+            pred_semantic = pred_semantic.unsqueeze(0).unsqueeze(0)
+        else:
+            pred_semantic = pred_semantic[:,-idx:]
+            pred_semantic = pred_semantic.unsqueeze(0)  # .unsqueeze(0)#mq要多unsqueeze一次
         refer = get_spepc(hps, ref_wav_path)  # .to(device)
         if (is_half == True):
             refer = refer.half().to(device)

From fa235308c99724515ea6a4f2968abdf1345e7014 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Wed, 20 Mar 2024 19:12:04 +0000
Subject: [PATCH 2/4] Update api.py

delete useless code
---
 api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api.py b/api.py
index 06350cf1..bba76116 100644
--- a/api.py
+++ b/api.py
@@ -109,7 +109,7 @@ import sys
 now_dir = os.getcwd()
 sys.path.append(now_dir)
 sys.path.append("%s/GPT_SoVITS" % (now_dir))
-import re
+
 import signal
 from time import time as ttime
 import torch

From e0082d77205b60056ccc6a542d59e18430d176ed Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Wed, 20 Mar 2024 19:44:23 +0000
Subject: [PATCH 3/4] Update api.py

---
 api.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/api.py b/api.py
index bba76116..5b8120d7 100644
--- a/api.py
+++ b/api.py
@@ -402,7 +402,6 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
         t2 = ttime()
         with torch.no_grad():
             # pred_semantic = t2s_model.model.infer(
-            print("-"*30)
             pred_semantic, idx = t2s_model.model.infer_panel(
                 all_phoneme_ids,
                 all_phoneme_len,

From 24218b27b44dced7320238f4bf0c115f1ff3ebb8 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Wed, 20 Mar 2024 20:07:18 +0000
Subject: [PATCH 4/4] Update api.py

---
 api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api.py b/api.py
index 5b8120d7..f94de103 100644
--- a/api.py
+++ b/api.py
@@ -411,7 +411,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language)
                 top_k=config['inference']['top_k'],
                 early_stop_num=hz * max_sec)
         t3 = ttime()
-        # print(pred_semantic[:,])
+        # print(pred_semantic.shape,idx)
         if isinstance(pred_semantic, list) and isinstance(pred_semantic, list):
             pred_semantic = pred_semantic[0]
             idx=idx[0]