mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-08 16:00:01 +08:00
恢复先前缩进
This commit is contained in:
parent
adb7f71b64
commit
3bfb20763d
@ -272,7 +272,7 @@ class TTS:
|
|||||||
# if ("pretrained" not in weights_path):
|
# if ("pretrained" not in weights_path):
|
||||||
if hasattr(vits_model, "enc_q"):
|
if hasattr(vits_model, "enc_q"):
|
||||||
del vits_model.enc_q
|
del vits_model.enc_q
|
||||||
|
|
||||||
vits_model = vits_model.to(self.configs.device)
|
vits_model = vits_model.to(self.configs.device)
|
||||||
vits_model = vits_model.eval()
|
vits_model = vits_model.eval()
|
||||||
vits_model.load_state_dict(dict_s2["weight"], strict=False)
|
vits_model.load_state_dict(dict_s2["weight"], strict=False)
|
||||||
@ -280,6 +280,7 @@ class TTS:
|
|||||||
if self.configs.is_half and str(self.configs.device)!="cpu":
|
if self.configs.is_half and str(self.configs.device)!="cpu":
|
||||||
self.vits_model = self.vits_model.half()
|
self.vits_model = self.vits_model.half()
|
||||||
|
|
||||||
|
|
||||||
def init_t2s_weights(self, weights_path: str):
|
def init_t2s_weights(self, weights_path: str):
|
||||||
print(f"Loading Text2Semantic weights from {weights_path}")
|
print(f"Loading Text2Semantic weights from {weights_path}")
|
||||||
self.configs.t2s_weights_path = weights_path
|
self.configs.t2s_weights_path = weights_path
|
||||||
@ -296,7 +297,7 @@ class TTS:
|
|||||||
self.t2s_model = t2s_model
|
self.t2s_model = t2s_model
|
||||||
if self.configs.is_half and str(self.configs.device)!="cpu":
|
if self.configs.is_half and str(self.configs.device)!="cpu":
|
||||||
self.t2s_model = self.t2s_model.half()
|
self.t2s_model = self.t2s_model.half()
|
||||||
|
|
||||||
def enable_half_precision(self, enable: bool = True):
|
def enable_half_precision(self, enable: bool = True):
|
||||||
'''
|
'''
|
||||||
To enable half precision for the TTS model.
|
To enable half precision for the TTS model.
|
||||||
@ -307,7 +308,7 @@ class TTS:
|
|||||||
if str(self.configs.device) == "cpu" and enable:
|
if str(self.configs.device) == "cpu" and enable:
|
||||||
print("Half precision is not supported on CPU.")
|
print("Half precision is not supported on CPU.")
|
||||||
return
|
return
|
||||||
|
|
||||||
self.configs.is_half = enable
|
self.configs.is_half = enable
|
||||||
self.precision = torch.float16 if enable else torch.float32
|
self.precision = torch.float16 if enable else torch.float32
|
||||||
self.configs.save_configs()
|
self.configs.save_configs()
|
||||||
@ -329,7 +330,7 @@ class TTS:
|
|||||||
self.bert_model = self.bert_model.float()
|
self.bert_model = self.bert_model.float()
|
||||||
if self.cnhuhbert_model is not None:
|
if self.cnhuhbert_model is not None:
|
||||||
self.cnhuhbert_model = self.cnhuhbert_model.float()
|
self.cnhuhbert_model = self.cnhuhbert_model.float()
|
||||||
|
|
||||||
def set_device(self, device: torch.device):
|
def set_device(self, device: torch.device):
|
||||||
'''
|
'''
|
||||||
To set the device for all models.
|
To set the device for all models.
|
||||||
@ -346,7 +347,7 @@ class TTS:
|
|||||||
self.bert_model = self.bert_model.to(device)
|
self.bert_model = self.bert_model.to(device)
|
||||||
if self.cnhuhbert_model is not None:
|
if self.cnhuhbert_model is not None:
|
||||||
self.cnhuhbert_model = self.cnhuhbert_model.to(device)
|
self.cnhuhbert_model = self.cnhuhbert_model.to(device)
|
||||||
|
|
||||||
def set_ref_audio(self, ref_audio_path:str):
|
def set_ref_audio(self, ref_audio_path:str):
|
||||||
'''
|
'''
|
||||||
To set the reference audio for the TTS model,
|
To set the reference audio for the TTS model,
|
||||||
@ -356,7 +357,7 @@ class TTS:
|
|||||||
'''
|
'''
|
||||||
self._set_prompt_semantic(ref_audio_path)
|
self._set_prompt_semantic(ref_audio_path)
|
||||||
self._set_ref_spec(ref_audio_path)
|
self._set_ref_spec(ref_audio_path)
|
||||||
|
|
||||||
def _set_ref_spec(self, ref_audio_path):
|
def _set_ref_spec(self, ref_audio_path):
|
||||||
audio = load_audio(ref_audio_path, int(self.configs.sampling_rate))
|
audio = load_audio(ref_audio_path, int(self.configs.sampling_rate))
|
||||||
audio = torch.FloatTensor(audio)
|
audio = torch.FloatTensor(audio)
|
||||||
@ -375,7 +376,8 @@ class TTS:
|
|||||||
spec = spec.half()
|
spec = spec.half()
|
||||||
# self.refer_spec = spec
|
# self.refer_spec = spec
|
||||||
self.prompt_cache["refer_spec"] = spec
|
self.prompt_cache["refer_spec"] = spec
|
||||||
|
|
||||||
|
|
||||||
def _set_prompt_semantic(self, ref_wav_path:str):
|
def _set_prompt_semantic(self, ref_wav_path:str):
|
||||||
zero_wav = np.zeros(
|
zero_wav = np.zeros(
|
||||||
int(self.configs.sampling_rate * 0.3),
|
int(self.configs.sampling_rate * 0.3),
|
||||||
@ -400,10 +402,10 @@ class TTS:
|
|||||||
1, 2
|
1, 2
|
||||||
) # .float()
|
) # .float()
|
||||||
codes = self.vits_model.extract_latent(hubert_feature)
|
codes = self.vits_model.extract_latent(hubert_feature)
|
||||||
|
|
||||||
prompt_semantic = codes[0, 0].to(self.configs.device)
|
prompt_semantic = codes[0, 0].to(self.configs.device)
|
||||||
self.prompt_cache["prompt_semantic"] = prompt_semantic
|
self.prompt_cache["prompt_semantic"] = prompt_semantic
|
||||||
|
|
||||||
def batch_sequences(self, sequences: List[torch.Tensor], axis: int = 0, pad_value: int = 0, max_length:int=None):
|
def batch_sequences(self, sequences: List[torch.Tensor], axis: int = 0, pad_value: int = 0, max_length:int=None):
|
||||||
seq = sequences[0]
|
seq = sequences[0]
|
||||||
ndim = seq.dim()
|
ndim = seq.dim()
|
||||||
@ -416,8 +418,7 @@ class TTS:
|
|||||||
max_length = max(seq_lengths)
|
max_length = max(seq_lengths)
|
||||||
else:
|
else:
|
||||||
max_length = max(seq_lengths) if max_length < max(seq_lengths) else max_length
|
max_length = max(seq_lengths) if max_length < max(seq_lengths) else max_length
|
||||||
# 我爱套 torch.no_grad()
|
|
||||||
# with torch.no_grad():
|
|
||||||
padded_sequences = []
|
padded_sequences = []
|
||||||
for seq, length in zip(sequences, seq_lengths):
|
for seq, length in zip(sequences, seq_lengths):
|
||||||
padding = [0] * axis + [0, max_length - length] + [0] * (ndim - axis - 1)
|
padding = [0] * axis + [0, max_length - length] + [0] * (ndim - axis - 1)
|
||||||
@ -425,7 +426,7 @@ class TTS:
|
|||||||
padded_sequences.append(padded_seq)
|
padded_sequences.append(padded_seq)
|
||||||
batch = torch.stack(padded_sequences)
|
batch = torch.stack(padded_sequences)
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
def to_batch(self, data:list,
|
def to_batch(self, data:list,
|
||||||
prompt_data:dict=None,
|
prompt_data:dict=None,
|
||||||
batch_size:int=5,
|
batch_size:int=5,
|
||||||
@ -434,115 +435,116 @@ class TTS:
|
|||||||
device:torch.device=torch.device("cpu"),
|
device:torch.device=torch.device("cpu"),
|
||||||
precision:torch.dtype=torch.float32,
|
precision:torch.dtype=torch.float32,
|
||||||
):
|
):
|
||||||
|
|
||||||
# 但是这里不能套,反而会负优化
|
# 但是这里不能套,反而会负优化
|
||||||
# with torch.no_grad():
|
# with torch.no_grad():
|
||||||
_data:list = []
|
_data:list = []
|
||||||
index_and_len_list = []
|
index_and_len_list = []
|
||||||
for idx, item in enumerate(data):
|
for idx, item in enumerate(data):
|
||||||
norm_text_len = len(item["norm_text"])
|
norm_text_len = len(item["norm_text"])
|
||||||
index_and_len_list.append([idx, norm_text_len])
|
index_and_len_list.append([idx, norm_text_len])
|
||||||
|
|
||||||
batch_index_list = []
|
batch_index_list = []
|
||||||
if split_bucket:
|
if split_bucket:
|
||||||
index_and_len_list.sort(key=lambda x: x[1])
|
index_and_len_list.sort(key=lambda x: x[1])
|
||||||
index_and_len_list = np.array(index_and_len_list, dtype=np.int64)
|
index_and_len_list = np.array(index_and_len_list, dtype=np.int64)
|
||||||
|
|
||||||
|
batch_index_list_len = 0
|
||||||
|
pos = 0
|
||||||
|
while pos <index_and_len_list.shape[0]:
|
||||||
|
# batch_index_list.append(index_and_len_list[pos:min(pos+batch_size,len(index_and_len_list))])
|
||||||
|
pos_end = min(pos+batch_size,index_and_len_list.shape[0])
|
||||||
|
while pos < pos_end:
|
||||||
|
batch=index_and_len_list[pos:pos_end, 1].astype(np.float32)
|
||||||
|
score=batch[(pos_end-pos)//2]/(batch.mean()+1e-8)
|
||||||
|
if (score>=threshold) or (pos_end-pos==1):
|
||||||
|
batch_index=index_and_len_list[pos:pos_end, 0].tolist()
|
||||||
|
batch_index_list_len += len(batch_index)
|
||||||
|
batch_index_list.append(batch_index)
|
||||||
|
pos = pos_end
|
||||||
|
break
|
||||||
|
pos_end=pos_end-1
|
||||||
|
|
||||||
|
assert batch_index_list_len == len(data)
|
||||||
|
|
||||||
|
else:
|
||||||
|
for i in range(len(data)):
|
||||||
|
if i%batch_size == 0:
|
||||||
|
batch_index_list.append([])
|
||||||
|
batch_index_list[-1].append(i)
|
||||||
|
|
||||||
batch_index_list_len = 0
|
|
||||||
pos = 0
|
for batch_idx, index_list in enumerate(batch_index_list):
|
||||||
while pos <index_and_len_list.shape[0]:
|
item_list = [data[idx] for idx in index_list]
|
||||||
# batch_index_list.append(index_and_len_list[pos:min(pos+batch_size,len(index_and_len_list))])
|
phones_list = []
|
||||||
pos_end = min(pos+batch_size,index_and_len_list.shape[0])
|
phones_len_list = []
|
||||||
while pos < pos_end:
|
# bert_features_list = []
|
||||||
batch=index_and_len_list[pos:pos_end, 1].astype(np.float32)
|
all_phones_list = []
|
||||||
score=batch[(pos_end-pos)//2]/(batch.mean()+1e-8)
|
all_phones_len_list = []
|
||||||
if (score>=threshold) or (pos_end-pos==1):
|
all_bert_features_list = []
|
||||||
batch_index=index_and_len_list[pos:pos_end, 0].tolist()
|
norm_text_batch = []
|
||||||
batch_index_list_len += len(batch_index)
|
bert_max_len = 0
|
||||||
batch_index_list.append(batch_index)
|
phones_max_len = 0
|
||||||
pos = pos_end
|
# 但是这里也不能套,反而会负优化
|
||||||
break
|
# with torch.no_grad():
|
||||||
pos_end=pos_end-1
|
for item in item_list:
|
||||||
|
if prompt_data is not None:
|
||||||
assert batch_index_list_len == len(data)
|
all_bert_features = torch.cat([prompt_data["bert_features"], item["bert_features"]], 1)\
|
||||||
|
|
||||||
else:
|
|
||||||
for i in range(len(data)):
|
|
||||||
if i%batch_size == 0:
|
|
||||||
batch_index_list.append([])
|
|
||||||
batch_index_list[-1].append(i)
|
|
||||||
|
|
||||||
for batch_idx, index_list in enumerate(batch_index_list):
|
|
||||||
item_list = [data[idx] for idx in index_list]
|
|
||||||
phones_list = []
|
|
||||||
phones_len_list = []
|
|
||||||
# bert_features_list = []
|
|
||||||
all_phones_list = []
|
|
||||||
all_phones_len_list = []
|
|
||||||
all_bert_features_list = []
|
|
||||||
norm_text_batch = []
|
|
||||||
bert_max_len = 0
|
|
||||||
phones_max_len = 0
|
|
||||||
# 但是这里也不能套,反而会负优化
|
|
||||||
# with torch.no_grad():
|
|
||||||
for item in item_list:
|
|
||||||
if prompt_data is not None:
|
|
||||||
all_bert_features = torch.cat([prompt_data["bert_features"], item["bert_features"]], 1)\
|
|
||||||
.to(dtype=precision, device=device)
|
|
||||||
all_phones = torch.LongTensor(prompt_data["phones"]+item["phones"]).to(device)
|
|
||||||
phones = torch.LongTensor(item["phones"]).to(device)
|
|
||||||
# norm_text = prompt_data["norm_text"]+item["norm_text"]
|
|
||||||
else:
|
|
||||||
all_bert_features = item["bert_features"]\
|
|
||||||
.to(dtype=precision, device=device)
|
.to(dtype=precision, device=device)
|
||||||
phones = torch.LongTensor(item["phones"]).to(device)
|
all_phones = torch.LongTensor(prompt_data["phones"]+item["phones"]).to(device)
|
||||||
all_phones = phones
|
phones = torch.LongTensor(item["phones"]).to(device)
|
||||||
# norm_text = item["norm_text"]
|
# norm_text = prompt_data["norm_text"]+item["norm_text"]
|
||||||
|
else:
|
||||||
bert_max_len = max(bert_max_len, all_bert_features.shape[-1])
|
all_bert_features = item["bert_features"]\
|
||||||
phones_max_len = max(phones_max_len, phones.shape[-1])
|
.to(dtype=precision, device=device)
|
||||||
|
phones = torch.LongTensor(item["phones"]).to(device)
|
||||||
phones_list.append(phones)
|
all_phones = phones
|
||||||
phones_len_list.append(phones.shape[-1])
|
# norm_text = item["norm_text"]
|
||||||
all_phones_list.append(all_phones)
|
|
||||||
all_phones_len_list.append(all_phones.shape[-1])
|
|
||||||
all_bert_features_list.append(all_bert_features)
|
|
||||||
norm_text_batch.append(item["norm_text"])
|
|
||||||
|
|
||||||
phones_batch = phones_list
|
|
||||||
all_phones_batch = all_phones_list
|
|
||||||
all_bert_features_batch = all_bert_features_list
|
|
||||||
|
|
||||||
# max_len = max(bert_max_len, phones_max_len)
|
|
||||||
# phones_batch = self.batch_sequences(phones_list, axis=0, pad_value=0, max_length=max_len)
|
|
||||||
#### 直接对phones和bert_features进行pad,会增大复读概率。
|
|
||||||
# all_phones_batch = self.batch_sequences(all_phones_list, axis=0, pad_value=0, max_length=max_len)
|
|
||||||
# all_bert_features_batch = all_bert_features_list
|
|
||||||
# all_bert_features_batch = torch.zeros(len(item_list), 1024, max_len, dtype=precision, device=device)
|
|
||||||
# for idx, item in enumerate(all_bert_features_list):
|
|
||||||
# all_bert_features_batch[idx, :, : item.shape[-1]] = item
|
|
||||||
|
|
||||||
# #### 先对phones进行embedding、对bert_features进行project,再pad到相同长度,以缓解复读问题。(可能还有其他因素导致复读)
|
|
||||||
# all_phones_list = [self.t2s_model.model.ar_text_embedding(item.to(self.t2s_model.device)) for item in all_phones_list]
|
|
||||||
# all_phones_list = [F.pad(item,(0,0,0,max_len-item.shape[0]),value=0) for item in all_phones_list]
|
|
||||||
# all_phones_batch = torch.stack(all_phones_list, dim=0)
|
|
||||||
|
|
||||||
# all_bert_features_list = [self.t2s_model.model.bert_proj(item.to(self.t2s_model.device).transpose(0, 1)) for item in all_bert_features_list]
|
|
||||||
# all_bert_features_list = [F.pad(item,(0,0,0,max_len-item.shape[0]), value=0) for item in all_bert_features_list]
|
|
||||||
# all_bert_features_batch = torch.stack(all_bert_features_list, dim=0)
|
|
||||||
|
|
||||||
batch = {
|
|
||||||
"phones": phones_batch,
|
|
||||||
"phones_len": torch.LongTensor(phones_len_list).to(device),
|
|
||||||
"all_phones": all_phones_batch,
|
|
||||||
"all_phones_len": torch.LongTensor(all_phones_len_list).to(device),
|
|
||||||
"all_bert_features": all_bert_features_batch,
|
|
||||||
"norm_text": norm_text_batch
|
|
||||||
}
|
|
||||||
_data.append(batch)
|
|
||||||
|
|
||||||
return _data, batch_index_list
|
|
||||||
|
|
||||||
|
bert_max_len = max(bert_max_len, all_bert_features.shape[-1])
|
||||||
|
phones_max_len = max(phones_max_len, phones.shape[-1])
|
||||||
|
|
||||||
|
phones_list.append(phones)
|
||||||
|
phones_len_list.append(phones.shape[-1])
|
||||||
|
all_phones_list.append(all_phones)
|
||||||
|
all_phones_len_list.append(all_phones.shape[-1])
|
||||||
|
all_bert_features_list.append(all_bert_features)
|
||||||
|
norm_text_batch.append(item["norm_text"])
|
||||||
|
|
||||||
|
phones_batch = phones_list
|
||||||
|
all_phones_batch = all_phones_list
|
||||||
|
all_bert_features_batch = all_bert_features_list
|
||||||
|
|
||||||
|
|
||||||
|
# max_len = max(bert_max_len, phones_max_len)
|
||||||
|
# phones_batch = self.batch_sequences(phones_list, axis=0, pad_value=0, max_length=max_len)
|
||||||
|
#### 直接对phones和bert_features进行pad,会增大复读概率。
|
||||||
|
# all_phones_batch = self.batch_sequences(all_phones_list, axis=0, pad_value=0, max_length=max_len)
|
||||||
|
# all_bert_features_batch = all_bert_features_list
|
||||||
|
# all_bert_features_batch = torch.zeros(len(item_list), 1024, max_len, dtype=precision, device=device)
|
||||||
|
# for idx, item in enumerate(all_bert_features_list):
|
||||||
|
# all_bert_features_batch[idx, :, : item.shape[-1]] = item
|
||||||
|
|
||||||
|
# #### 先对phones进行embedding、对bert_features进行project,再pad到相同长度,以缓解复读问题。(可能还有其他因素导致复读)
|
||||||
|
# all_phones_list = [self.t2s_model.model.ar_text_embedding(item.to(self.t2s_model.device)) for item in all_phones_list]
|
||||||
|
# all_phones_list = [F.pad(item,(0,0,0,max_len-item.shape[0]),value=0) for item in all_phones_list]
|
||||||
|
# all_phones_batch = torch.stack(all_phones_list, dim=0)
|
||||||
|
|
||||||
|
# all_bert_features_list = [self.t2s_model.model.bert_proj(item.to(self.t2s_model.device).transpose(0, 1)) for item in all_bert_features_list]
|
||||||
|
# all_bert_features_list = [F.pad(item,(0,0,0,max_len-item.shape[0]), value=0) for item in all_bert_features_list]
|
||||||
|
# all_bert_features_batch = torch.stack(all_bert_features_list, dim=0)
|
||||||
|
|
||||||
|
batch = {
|
||||||
|
"phones": phones_batch,
|
||||||
|
"phones_len": torch.LongTensor(phones_len_list).to(device),
|
||||||
|
"all_phones": all_phones_batch,
|
||||||
|
"all_phones_len": torch.LongTensor(all_phones_len_list).to(device),
|
||||||
|
"all_bert_features": all_bert_features_batch,
|
||||||
|
"norm_text": norm_text_batch
|
||||||
|
}
|
||||||
|
_data.append(batch)
|
||||||
|
|
||||||
|
return _data, batch_index_list
|
||||||
|
|
||||||
def recovery_order(self, data:list, batch_index_list:list)->list:
|
def recovery_order(self, data:list, batch_index_list:list)->list:
|
||||||
'''
|
'''
|
||||||
Recovery the order of the audio according to the batch_index_list.
|
Recovery the order of the audio according to the batch_index_list.
|
||||||
@ -566,7 +568,8 @@ class TTS:
|
|||||||
Stop the inference process.
|
Stop the inference process.
|
||||||
'''
|
'''
|
||||||
self.stop_flag = True
|
self.stop_flag = True
|
||||||
|
|
||||||
|
|
||||||
def run(self, inputs:dict):
|
def run(self, inputs:dict):
|
||||||
"""
|
"""
|
||||||
Text to speech inference.
|
Text to speech inference.
|
||||||
@ -850,7 +853,7 @@ class TTS:
|
|||||||
raise e
|
raise e
|
||||||
finally:
|
finally:
|
||||||
self.empty_cache()
|
self.empty_cache()
|
||||||
|
|
||||||
def empty_cache(self):
|
def empty_cache(self):
|
||||||
try:
|
try:
|
||||||
if "cuda" in str(self.configs.device):
|
if "cuda" in str(self.configs.device):
|
||||||
@ -859,7 +862,7 @@ class TTS:
|
|||||||
torch.mps.empty_cache()
|
torch.mps.empty_cache()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def audio_postprocess(self,
|
def audio_postprocess(self,
|
||||||
audio:List[torch.Tensor],
|
audio:List[torch.Tensor],
|
||||||
sr:int,
|
sr:int,
|
||||||
@ -873,32 +876,36 @@ class TTS:
|
|||||||
dtype=self.precision,
|
dtype=self.precision,
|
||||||
device=self.configs.device
|
device=self.configs.device
|
||||||
)
|
)
|
||||||
|
|
||||||
for i, batch in enumerate(audio):
|
for i, batch in enumerate(audio):
|
||||||
for j, audio_fragment in enumerate(batch):
|
for j, audio_fragment in enumerate(batch):
|
||||||
max_audio=torch.abs(audio_fragment).max()#简单防止16bit爆音
|
max_audio=torch.abs(audio_fragment).max()#简单防止16bit爆音
|
||||||
if max_audio>1: audio_fragment/=max_audio
|
if max_audio>1: audio_fragment/=max_audio
|
||||||
audio_fragment:torch.Tensor = torch.cat([audio_fragment, zero_wav], dim=0)
|
audio_fragment:torch.Tensor = torch.cat([audio_fragment, zero_wav], dim=0)
|
||||||
audio[i][j] = audio_fragment.cpu().numpy()
|
audio[i][j] = audio_fragment.cpu().numpy()
|
||||||
|
|
||||||
|
|
||||||
if split_bucket:
|
if split_bucket:
|
||||||
audio = self.recovery_order(audio, batch_index_list)
|
audio = self.recovery_order(audio, batch_index_list)
|
||||||
else:
|
else:
|
||||||
# audio = [item for batch in audio for item in batch]
|
# audio = [item for batch in audio for item in batch]
|
||||||
audio = sum(audio, [])
|
audio = sum(audio, [])
|
||||||
|
|
||||||
|
|
||||||
audio = np.concatenate(audio, 0)
|
audio = np.concatenate(audio, 0)
|
||||||
audio = (audio * 32768).astype(np.int16)
|
audio = (audio * 32768).astype(np.int16)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if speed_factor != 1.0:
|
if speed_factor != 1.0:
|
||||||
audio = speed_change(audio, speed=speed_factor, sr=int(sr))
|
audio = speed_change(audio, speed=speed_factor, sr=int(sr))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to change speed of audio: \n{e}")
|
print(f"Failed to change speed of audio: \n{e}")
|
||||||
|
|
||||||
return sr, audio
|
return sr, audio
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def speed_change(input_audio:np.ndarray, speed:float, sr:int):
|
def speed_change(input_audio:np.ndarray, speed:float, sr:int):
|
||||||
# 将 NumPy 数组转换为原始 PCM 流
|
# 将 NumPy 数组转换为原始 PCM 流
|
||||||
raw_audio = input_audio.astype(np.int16).tobytes()
|
raw_audio = input_audio.astype(np.int16).tobytes()
|
||||||
@ -918,4 +925,4 @@ def speed_change(input_audio:np.ndarray, speed:float, sr:int):
|
|||||||
# 将管道输出解码为 NumPy 数组
|
# 将管道输出解码为 NumPy 数组
|
||||||
processed_audio = np.frombuffer(out, np.int16)
|
processed_audio = np.frombuffer(out, np.int16)
|
||||||
|
|
||||||
return processed_audio
|
return processed_audio
|
Loading…
x
Reference in New Issue
Block a user