mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
26 lines
710 B
Python
26 lines
710 B
Python
import torch
|
||
|
||
|
||
def get_model():
|
||
import whisper
|
||
|
||
model = whisper.load_model("small", device="cpu")
|
||
|
||
return model.encoder
|
||
|
||
|
||
def get_content(model=None, wav_16k_tensor=None):
|
||
from whisper import log_mel_spectrogram, pad_or_trim
|
||
|
||
dev = next(model.parameters()).device
|
||
mel = log_mel_spectrogram(wav_16k_tensor).to(dev)[:, :3000]
|
||
# if torch.cuda.is_available():
|
||
# mel = mel.to(torch.float16)
|
||
feature_len = mel.shape[-1] // 2
|
||
assert mel.shape[-1] < 3000, "输入音频过长,只允许输入30以内音频"
|
||
with torch.no_grad():
|
||
feature = model(pad_or_trim(mel, 3000).unsqueeze(0))[
|
||
:1, :feature_len, :
|
||
].transpose(1, 2)
|
||
return feature
|