mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-09 00:10:00 +08:00
恢复先前缩进
This commit is contained in:
parent
adb7f71b64
commit
3bfb20763d
@ -280,6 +280,7 @@ class TTS:
|
|||||||
if self.configs.is_half and str(self.configs.device)!="cpu":
|
if self.configs.is_half and str(self.configs.device)!="cpu":
|
||||||
self.vits_model = self.vits_model.half()
|
self.vits_model = self.vits_model.half()
|
||||||
|
|
||||||
|
|
||||||
def init_t2s_weights(self, weights_path: str):
|
def init_t2s_weights(self, weights_path: str):
|
||||||
print(f"Loading Text2Semantic weights from {weights_path}")
|
print(f"Loading Text2Semantic weights from {weights_path}")
|
||||||
self.configs.t2s_weights_path = weights_path
|
self.configs.t2s_weights_path = weights_path
|
||||||
@ -376,6 +377,7 @@ class TTS:
|
|||||||
# self.refer_spec = spec
|
# self.refer_spec = spec
|
||||||
self.prompt_cache["refer_spec"] = spec
|
self.prompt_cache["refer_spec"] = spec
|
||||||
|
|
||||||
|
|
||||||
def _set_prompt_semantic(self, ref_wav_path:str):
|
def _set_prompt_semantic(self, ref_wav_path:str):
|
||||||
zero_wav = np.zeros(
|
zero_wav = np.zeros(
|
||||||
int(self.configs.sampling_rate * 0.3),
|
int(self.configs.sampling_rate * 0.3),
|
||||||
@ -416,8 +418,7 @@ class TTS:
|
|||||||
max_length = max(seq_lengths)
|
max_length = max(seq_lengths)
|
||||||
else:
|
else:
|
||||||
max_length = max(seq_lengths) if max_length < max(seq_lengths) else max_length
|
max_length = max(seq_lengths) if max_length < max(seq_lengths) else max_length
|
||||||
# 我爱套 torch.no_grad()
|
|
||||||
# with torch.no_grad():
|
|
||||||
padded_sequences = []
|
padded_sequences = []
|
||||||
for seq, length in zip(sequences, seq_lengths):
|
for seq, length in zip(sequences, seq_lengths):
|
||||||
padding = [0] * axis + [0, max_length - length] + [0] * (ndim - axis - 1)
|
padding = [0] * axis + [0, max_length - length] + [0] * (ndim - axis - 1)
|
||||||
@ -434,7 +435,6 @@ class TTS:
|
|||||||
device:torch.device=torch.device("cpu"),
|
device:torch.device=torch.device("cpu"),
|
||||||
precision:torch.dtype=torch.float32,
|
precision:torch.dtype=torch.float32,
|
||||||
):
|
):
|
||||||
|
|
||||||
# 但是这里不能套,反而会负优化
|
# 但是这里不能套,反而会负优化
|
||||||
# with torch.no_grad():
|
# with torch.no_grad():
|
||||||
_data:list = []
|
_data:list = []
|
||||||
@ -472,6 +472,7 @@ class TTS:
|
|||||||
batch_index_list.append([])
|
batch_index_list.append([])
|
||||||
batch_index_list[-1].append(i)
|
batch_index_list[-1].append(i)
|
||||||
|
|
||||||
|
|
||||||
for batch_idx, index_list in enumerate(batch_index_list):
|
for batch_idx, index_list in enumerate(batch_index_list):
|
||||||
item_list = [data[idx] for idx in index_list]
|
item_list = [data[idx] for idx in index_list]
|
||||||
phones_list = []
|
phones_list = []
|
||||||
@ -513,6 +514,7 @@ class TTS:
|
|||||||
all_phones_batch = all_phones_list
|
all_phones_batch = all_phones_list
|
||||||
all_bert_features_batch = all_bert_features_list
|
all_bert_features_batch = all_bert_features_list
|
||||||
|
|
||||||
|
|
||||||
# max_len = max(bert_max_len, phones_max_len)
|
# max_len = max(bert_max_len, phones_max_len)
|
||||||
# phones_batch = self.batch_sequences(phones_list, axis=0, pad_value=0, max_length=max_len)
|
# phones_batch = self.batch_sequences(phones_list, axis=0, pad_value=0, max_length=max_len)
|
||||||
#### 直接对phones和bert_features进行pad,会增大复读概率。
|
#### 直接对phones和bert_features进行pad,会增大复读概率。
|
||||||
@ -567,6 +569,7 @@ class TTS:
|
|||||||
'''
|
'''
|
||||||
self.stop_flag = True
|
self.stop_flag = True
|
||||||
|
|
||||||
|
|
||||||
def run(self, inputs:dict):
|
def run(self, inputs:dict):
|
||||||
"""
|
"""
|
||||||
Text to speech inference.
|
Text to speech inference.
|
||||||
@ -881,12 +884,14 @@ class TTS:
|
|||||||
audio_fragment:torch.Tensor = torch.cat([audio_fragment, zero_wav], dim=0)
|
audio_fragment:torch.Tensor = torch.cat([audio_fragment, zero_wav], dim=0)
|
||||||
audio[i][j] = audio_fragment.cpu().numpy()
|
audio[i][j] = audio_fragment.cpu().numpy()
|
||||||
|
|
||||||
|
|
||||||
if split_bucket:
|
if split_bucket:
|
||||||
audio = self.recovery_order(audio, batch_index_list)
|
audio = self.recovery_order(audio, batch_index_list)
|
||||||
else:
|
else:
|
||||||
# audio = [item for batch in audio for item in batch]
|
# audio = [item for batch in audio for item in batch]
|
||||||
audio = sum(audio, [])
|
audio = sum(audio, [])
|
||||||
|
|
||||||
|
|
||||||
audio = np.concatenate(audio, 0)
|
audio = np.concatenate(audio, 0)
|
||||||
audio = (audio * 32768).astype(np.int16)
|
audio = (audio * 32768).astype(np.int16)
|
||||||
|
|
||||||
@ -899,6 +904,8 @@ class TTS:
|
|||||||
return sr, audio
|
return sr, audio
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def speed_change(input_audio:np.ndarray, speed:float, sr:int):
|
def speed_change(input_audio:np.ndarray, speed:float, sr:int):
|
||||||
# 将 NumPy 数组转换为原始 PCM 流
|
# 将 NumPy 数组转换为原始 PCM 流
|
||||||
raw_audio = input_audio.astype(np.int16).tobytes()
|
raw_audio = input_audio.astype(np.int16).tobytes()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user