使用librosa加载音频避免ffmpeg.probe读取metadata的错误

如题,有用户称在读取音频的metadata时出现问题,这可能是ffprobe造成的。部分站点在存在写入不合规metadata的情况(如一些直播站点将直播间信息写入metadata,其中包含emoji等乱七八糟信息),这在ffmpeg.probe时将会出现问题。
考虑到ffmpeg兼容性和性能比librosa更好,可能在导入前对metadata合规性处理会更好?
但是看到后面注释部分很多地方还是使用librosa实现,所以暂且认为还不用考虑兼容性问题。
This commit is contained in:
KakaruHayate 2024-01-20 12:09:21 +08:00 committed by GitHub
parent ca8800f312
commit 240e0e289b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -5,7 +5,8 @@ from tools.i18n.i18n import I18nAuto
i18n = I18nAuto()
logger = logging.getLogger(__name__)
import ffmpeg
import librosa
import soundfile as sf
import torch
import sys
from mdxnet import MDXNetDereverb
@ -53,16 +54,17 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
need_reformat = 1
done = 0
try:
info = ffmpeg.probe(inp_path, cmd="ffprobe")
if (
info["streams"][0]["channels"] == 2
and info["streams"][0]["sample_rate"] == "44100"
):
y, sr = librosa.load(inp_path, sr=None)
info = sf.info(inp_path)
channels = info.channels
if channels == 2 and sr == 44100:
need_reformat = 0
pre_fun._path_audio_(
inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
)
done = 1
else:
need_reformat = 1
except:
need_reformat = 1
traceback.print_exc()
@ -71,10 +73,8 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
os.path.join(os.environ["TEMP"]),
os.path.basename(inp_path),
)
os.system(
"ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
% (inp_path, tmp_path)
)
y_resampled = librosa.resample(y, sr, 44100)
sf.write(tmp_path, y_resampled, 44100, "PCM_16")
inp_path = tmp_path
try:
if done == 0:
@ -181,4 +181,4 @@ app.queue(concurrency_count=511, max_size=1022).launch(
inbrowser=True,
server_port=9873,
quiet=True,
)
)