mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
音频相似度比较,添加参考音频的预采样步骤
This commit is contained in:
parent
684e1cfd2f
commit
ca9ffbf98e
@ -7,6 +7,8 @@ time_log_print_type = file
|
||||
time_log_print_dir = Ref_Audio_Selector/log/performance
|
||||
# 参考音频目录
|
||||
reference_audio_dir = refer_audio
|
||||
# 临时文件目录
|
||||
temp_dir = Ref_Audio_Selector/temp
|
||||
|
||||
[AudioSample]
|
||||
# list转换待选参考音频目录
|
||||
|
@ -9,6 +9,8 @@ time_log_print_type = config.get_base('time_log_print_type')
|
||||
time_log_print_dir = config.get_base('time_log_print_dir')
|
||||
# 参考音频目录
|
||||
reference_audio_dir = config.get_base('reference_audio_dir')
|
||||
# 临时文件目录
|
||||
temp_dir = config.get_base('temp_dir')
|
||||
|
||||
# [AudioSample]
|
||||
# list转换待选参考音频目录
|
||||
|
@ -1,5 +1,9 @@
|
||||
import argparse
|
||||
import os
|
||||
import soundfile as sf
|
||||
import torchaudio
|
||||
import torchaudio.transforms as T
|
||||
import Ref_Audio_Selector.config_param.config_params as params
|
||||
from Ref_Audio_Selector.common.time_util import timeit_decorator
|
||||
|
||||
from modelscope.pipelines import pipeline
|
||||
@ -17,10 +21,13 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path,
|
||||
comparison_audio_paths = [os.path.join(comparison_dir_path, f) for f in os.listdir(comparison_dir_path) if
|
||||
f.endswith('.wav')]
|
||||
|
||||
# 因为这个模型是基于16k音频数据训练的,为了避免后续比较时,每次都对参考音频进行重采样,所以,提前进行了采样
|
||||
reference_audio_16k = ensure_16k_wav(reference_audio_path)
|
||||
|
||||
# Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径
|
||||
similarity_scores = []
|
||||
for audio_path in comparison_audio_paths:
|
||||
score = sv_pipeline([reference_audio_path, audio_path])['score']
|
||||
score = sv_pipeline([reference_audio_16k, audio_path])['score']
|
||||
similarity_scores.append({
|
||||
'score': score,
|
||||
'path': audio_path
|
||||
@ -42,6 +49,56 @@ def compare_audio_and_generate_report(reference_audio_path, comparison_dir_path,
|
||||
f.write(content)
|
||||
|
||||
|
||||
def ensure_16k_wav(audio_file_path, target_sample_rate=16000):
|
||||
# 读取音频文件信息
|
||||
sample_rate, audio_data = sf.read(audio_file_path)
|
||||
|
||||
# 检查采样率是否为16kHz
|
||||
if sample_rate == target_sample_rate:
|
||||
# 是16kHz采样率,直接返回原始文件路径
|
||||
return audio_file_path
|
||||
|
||||
# 设置临时文件名
|
||||
temp_file_path = os.path.join(params.temp_dir, os.path.basename(audio_file_path))
|
||||
|
||||
# 重采样至16kHz并保存到临时文件
|
||||
sf.write(temp_file_path, audio_data, samplerate=target_sample_rate, format="WAV")
|
||||
|
||||
return temp_file_path
|
||||
|
||||
|
||||
def ensure_16k_wav_2(audio_file_path, target_sample_rate=16000):
|
||||
"""
|
||||
输入一个音频文件地址,判断其采样率并决定是否进行重采样,然后将结果保存到指定的输出文件。
|
||||
|
||||
参数:
|
||||
audio_file_path (str): 音频文件路径。
|
||||
output_file_path (str): 保存重采样后音频数据的目标文件路径。
|
||||
target_sample_rate (int, optional): 目标采样率,默认为16000Hz。
|
||||
"""
|
||||
# 读取音频文件并获取其采样率
|
||||
waveform, sample_rate = torchaudio.load(audio_file_path)
|
||||
|
||||
# 判断是否需要重采样
|
||||
if sample_rate == target_sample_rate:
|
||||
return audio_file_path
|
||||
else:
|
||||
|
||||
# 创建Resample实例
|
||||
resampler = T.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
|
||||
|
||||
# 应用重采样
|
||||
resampled_waveform = resampler(waveform)
|
||||
|
||||
# 设置临时文件名
|
||||
temp_file_path = os.path.join(params.temp_dir, os.path.basename(audio_file_path))
|
||||
|
||||
# 保存重采样后的音频到指定文件
|
||||
torchaudio.save(temp_file_path, resampled_waveform, target_sample_rate)
|
||||
|
||||
return temp_file_path
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(description="Audio processing script arguments")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user