音频相似度比较,添加参考音频的预采样步骤

This commit is contained in:
Downupanddownup 2024-04-26 15:00:34 +08:00
parent 684e1cfd2f
commit ca9ffbf98e
3 changed files with 62 additions and 1 deletions

View File

@ -7,6 +7,8 @@ time_log_print_type = file
time_log_print_dir = Ref_Audio_Selector/log/performance
# 参考音频目录
reference_audio_dir = refer_audio
# 临时文件目录
temp_dir = Ref_Audio_Selector/temp
[AudioSample]
# list转换待选参考音频目录

View File

@ -9,6 +9,8 @@ time_log_print_type = config.get_base('time_log_print_type')
time_log_print_dir = config.get_base('time_log_print_dir')
# 参考音频目录
reference_audio_dir = config.get_base('reference_audio_dir')
# 临时文件目录
temp_dir = config.get_base('temp_dir')
# [AudioSample]
# list转换待选参考音频目录

View File

@ -1,5 +1,9 @@
import argparse
import os
import soundfile as sf
import torchaudio
import torchaudio.transforms as T
import Ref_Audio_Selector.config_param.config_params as params
from Ref_Audio_Selector.common.time_util import timeit_decorator
from modelscope.pipelines import pipeline
@ -17,10 +21,13 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path,
comparison_audio_paths = [os.path.join(comparison_dir_path, f) for f in os.listdir(comparison_dir_path) if
f.endswith('.wav')]
# 因为这个模型是基于16k音频数据训练的为了避免后续比较时每次都对参考音频进行重采样所以提前进行了采样
reference_audio_16k = ensure_16k_wav(reference_audio_path)
# Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径
similarity_scores = []
for audio_path in comparison_audio_paths:
score = sv_pipeline([reference_audio_path, audio_path])['score']
score = sv_pipeline([reference_audio_16k, audio_path])['score']
similarity_scores.append({
'score': score,
'path': audio_path
@ -42,6 +49,56 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path,
f.write(content)
def ensure_16k_wav(audio_file_path, target_sample_rate=16000):
# 读取音频文件信息
sample_rate, audio_data = sf.read(audio_file_path)
# 检查采样率是否为16kHz
if sample_rate == target_sample_rate:
# 是16kHz采样率直接返回原始文件路径
return audio_file_path
# 设置临时文件名
temp_file_path = os.path.join(params.temp_dir, os.path.basename(audio_file_path))
# 重采样至16kHz并保存到临时文件
sf.write(temp_file_path, audio_data, samplerate=target_sample_rate, format="WAV")
return temp_file_path
def ensure_16k_wav_2(audio_file_path, target_sample_rate=16000):
"""
输入一个音频文件地址判断其采样率并决定是否进行重采样然后将结果保存到指定的输出文件
参数:
audio_file_path (str): 音频文件路径
output_file_path (str): 保存重采样后音频数据的目标文件路径
target_sample_rate (int, optional): 目标采样率默认为16000Hz
"""
# 读取音频文件并获取其采样率
waveform, sample_rate = torchaudio.load(audio_file_path)
# 判断是否需要重采样
if sample_rate == target_sample_rate:
return audio_file_path
else:
# 创建Resample实例
resampler = T.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
# 应用重采样
resampled_waveform = resampler(waveform)
# 设置临时文件名
temp_file_path = os.path.join(params.temp_dir, os.path.basename(audio_file_path))
# 保存重采样后的音频到指定文件
torchaudio.save(temp_file_path, resampled_waveform, target_sample_rate)
return temp_file_path
def parse_arguments():
parser = argparse.ArgumentParser(description="Audio processing script arguments")