From 9907fc1152904d4efaa710c2592a61644ee185b4 Mon Sep 17 00:00:00 2001 From: XXXXRT666 Date: Thu, 11 Apr 2024 01:16:14 +0100 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=E5=88=87=E5=AE=8C?= =?UTF-8?q?=E6=BC=8F=E9=9F=B3=E9=A2=91=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/slice_audio.py | 52 +++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/tools/slice_audio.py b/tools/slice_audio.py index fb6ef413..5451899e 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -424,22 +424,48 @@ class Slicer: return chunks +# def merge_short_chunks(chunks, max_duration, rate): +# merged_chunks = [] +# buffer, length = [], 0 +# lengths = [len(chunk)/rate for chunk in chunks] +# print(lengths) +# for chunk in chunks: +# if length + len(chunk) > max_duration * rate and len(buffer) > 0: +# print(len(buffer)) +# merged_chunks.append(np.concatenate(buffer)) +# buffer, length = [], 0 +# else: +# buffer.append(chunk) +# length += len(chunk) + + +# if len(buffer) > 0: +# print(len(buffer)) +# merged_chunks.append(np.concatenate(buffer)) + +# print([len(chunk)/rate for chunk in merged_chunks]) + +# return merged_chunks + + def merge_short_chunks(chunks, max_duration, rate): - merged_chunks = [] - buffer, length = [], 0 - - for chunk in chunks: - if length + len(chunk) > max_duration * rate and len(buffer) > 0: - merged_chunks.append(np.concatenate(buffer)) - buffer, length = [], 0 + if not chunks: + return [] + + max_length = int(max_duration * rate) # 确保 max_length 是整数 + merged = [] + current = chunks[0] # 开始时 current 是第一个音频块 + for chunk in chunks[1:]: # 从第二个音频块开始遍历 + if len(current) + len(chunk) <= max_length: + current = np.concatenate((current, np.zeros(int(0.1*rate)), chunk)) # 在合并前后加入一个0.1s作为间隔 else: - buffer.append(chunk) - length += len(chunk) + merged.append(current) + current = chunk # 开始新的合并块 + + merged.append(current) # 添加最后一个块 + return merged - if len(buffer) > 0: - merged_chunks.append(np.concatenate(buffer)) - return merged_chunks @@ -472,7 +498,7 @@ min_interval = float(args.min_interval) hop_size = float(args.hop_size) max_sil_kept = float(args.max_sil_kept) num_worker = int(args.num_worker) -merge_short = bool(args.merge_short) +merge_short = eval(args.merge_short) if __name__ == "__main__": slice_audio_v2_(input_path, output_dir, num_worker, min_duration, max_duration, min_interval, threshold, hop_size, max_sil_kept,merge_short)