From 878fef248ad56c827e3b5d320a8ce3dcb703b270 Mon Sep 17 00:00:00 2001 From: Downupanddownup Date: Fri, 26 Apr 2024 14:16:16 +0800 Subject: [PATCH] =?UTF-8?q?bug=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ref_audio_selector_webui.py | 2 +- Ref_Audio_Selector/tool/audio_inference.py | 26 +++++-- Ref_Audio_Selector/tool/audio_similarity.py | 2 +- .../tool/text_comparison/asr_text_process.py | 26 ++++++- .../tool/text_comparison/text_comparison.py | 70 +++++++++++++++++++ 5 files changed, 114 insertions(+), 12 deletions(-) diff --git a/Ref_Audio_Selector/ref_audio_selector_webui.py b/Ref_Audio_Selector/ref_audio_selector_webui.py index 0a12785..e3cead2 100644 --- a/Ref_Audio_Selector/ref_audio_selector_webui.py +++ b/Ref_Audio_Selector/ref_audio_selector_webui.py @@ -289,7 +289,7 @@ def text_similarity_analysis(text_work_space_dir, text_role, return i18n(text_text_similarity_analysis_info) -def open_text_similarity_analysis(asr_file_path, output_dir, similarity_enlarge_boundary=0.8): +def open_text_similarity_analysis(asr_file_path, output_dir, similarity_enlarge_boundary=0.9): global p_text_similarity if p_text_similarity is None: cmd = f'"{python_exec}" Ref_Audio_Selector/tool/text_comparison/asr_text_process.py ' diff --git a/Ref_Audio_Selector/tool/audio_inference.py b/Ref_Audio_Selector/tool/audio_inference.py index a5cc7e5..39cc2c6 100644 --- a/Ref_Audio_Selector/tool/audio_inference.py +++ b/Ref_Audio_Selector/tool/audio_inference.py @@ -85,19 +85,15 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path) emotion_subdir = os.path.join(output_dir, params.inference_audio_emotion_aggregation_dir) os.makedirs(emotion_subdir, exist_ok=True) + all_count = len(text_list) * len(emotion_list) + has_generated_count = 0 + # 计算笛卡尔积 cartesian_product = list(itertools.product(text_list, emotion_list)) for text, emotion in cartesian_product: # Generate audio byte stream using the create_audio function - if url_composer.is_emotion(): - real_url = url_composer.build_url_with_emotion(text, emotion['emotion']) - else: - real_url = url_composer.build_url_with_ref(text, emotion['ref_path'], emotion['ref_text']) - - audio_bytes = inference_audio_from_api(real_url) - emotion_name = emotion['emotion'] text_subdir_text = os.path.join(text_subdir, text) @@ -108,12 +104,28 @@ def generate_audio_files(url_composer, text_list, emotion_list, output_dir_path) os.makedirs(emotion_subdir_emotion, exist_ok=True) emotion_subdir_emotion_file_path = os.path.join(emotion_subdir_emotion, text + '.wav') + # 检查是否已经存在对应的音频文件,如果存在则跳过 + if os.path.exists(text_subdir_text_file_path) and os.path.exists(emotion_subdir_emotion_file_path): + has_generated_count += 1 + print(f"进度: {has_generated_count}/{all_count}") + continue + + if url_composer.is_emotion(): + real_url = url_composer.build_url_with_emotion(text, emotion['emotion']) + else: + real_url = url_composer.build_url_with_ref(text, emotion['ref_path'], emotion['ref_text']) + + audio_bytes = inference_audio_from_api(real_url) + # Write audio bytes to the respective files with open(text_subdir_text_file_path, 'wb') as f: f.write(audio_bytes) with open(emotion_subdir_emotion_file_path, 'wb') as f: f.write(audio_bytes) + has_generated_count += 1 + print(f"进度: {has_generated_count}/{all_count}") + def inference_audio_from_api(url): # 发起GET请求 diff --git a/Ref_Audio_Selector/tool/audio_similarity.py b/Ref_Audio_Selector/tool/audio_similarity.py index 3c6fe1c..658251f 100644 --- a/Ref_Audio_Selector/tool/audio_similarity.py +++ b/Ref_Audio_Selector/tool/audio_similarity.py @@ -142,7 +142,7 @@ def copy_and_move(output_audio_directory, similarity_scores): for item in similarity_scores: # 构造新的文件名 base_name = os.path.basename(item['wav_path'])[:-4] # 去掉.wav扩展名 - new_name = f"{item['score']}-{base_name}.wav" + new_name = f"{item['score']*10000:04.0f}-{base_name}.wav" # 新文件的完整路径 new_path = os.path.join(output_audio_directory, new_name) diff --git a/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py b/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py index 486391e..585df90 100644 --- a/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py +++ b/Ref_Audio_Selector/tool/text_comparison/asr_text_process.py @@ -33,10 +33,16 @@ def parse_asr_file(file_path): return output +@timeit_decorator def calculate_similarity_and_append_to_list(input_list, boundary): + all_count = len(input_list) + has_been_processed_count = 0 for item in input_list: - _, similarity_score = text_comparison.calculate_result(item['original_text'], item['asr_text'], boundary) + original_score, similarity_score = text_comparison.calculate_result(item['original_text'], item['asr_text'], boundary) item['similarity_score'] = similarity_score + item['original_score'] = original_score + has_been_processed_count += 1 + print(f'进度:{has_been_processed_count}/{all_count}') return input_list @@ -79,13 +85,27 @@ def group_and_sort_by_field(data, group_by_field): def format_list_to_text(data_list, output_filename): with open(output_filename, 'w', encoding='utf-8') as output_file: + output_file.write('放大后的相似度分值|原始分值|ASR文本|原文文本\n') for key, items in data_list: # 写入情绪标题 output_file.write(key + '\n') # 写入每条记录 for item in items: - formatted_line = f"{item['similarity_score']}|{item['original_text']}|{item['asr_text']}\n" + formatted_line = f"{item['similarity_score']}|{item['original_score']}|{item['asr_text']}|{item['original_text']}\n" + output_file.write(formatted_line) + + +def format_list_to_emotion(data_list, output_filename): + with open(output_filename, 'w', encoding='utf-8') as output_file: + output_file.write('放大后的相似度分值|原始分值|ASR文本|情绪类型\n') + for key, items in data_list: + # 写入情绪标题 + output_file.write(key + '\n') + + # 写入每条记录 + for item in items: + formatted_line = f"{item['similarity_score']}|{item['original_score']}|{item['asr_text']}|{item['emotion']}\n" output_file.write(formatted_line) @@ -113,7 +133,7 @@ def process(asr_file_path, output_dir, similarity_enlarge_boundary): original_text_detail_list = group_and_sort_by_field(records, 'original_text') original_text_detail_file = os.path.join(output_dir, f'{params.text_similarity_by_text_detail_filename}.txt') - format_list_to_text(original_text_detail_list, original_text_detail_file) + format_list_to_emotion(original_text_detail_list, original_text_detail_file) print('文本相似度分析完成。') diff --git a/Ref_Audio_Selector/tool/text_comparison/text_comparison.py b/Ref_Audio_Selector/tool/text_comparison/text_comparison.py index 5a33776..edb0c7d 100644 --- a/Ref_Audio_Selector/tool/text_comparison/text_comparison.py +++ b/Ref_Audio_Selector/tool/text_comparison/text_comparison.py @@ -51,3 +51,73 @@ def calculate_result(t1, t2, boundary): return similarity_score2, adjusted_similarity_score2 +def print_result(t1, t2, boundary): + print(f't2: {t2}') + # 计算并打印相似度 + similarity_score2 = calculate_similarity(t1, t2) + print(f"两句话的相似度为: {similarity_score2:.4f}") + + # 调整相似度 + adjusted_similarity_score2 = adjusted_similarity(similarity_score2, boundary) + print(f"调整后的相似度为: {adjusted_similarity_score2:.4f}") + + +def test(boundary): + # 原始文本 + text1 = "这是第一个句子" + list = """ + 这是第一个句子 + 这是第二个句子。 + 那么,这是第三个表达。 + 当前呈现的是第四个句子。 + 接下来,我们有第五句话。 + 在此,展示第六条陈述。 + 继续下去,这是第七个短句。 + 不容忽视的是第八个表述。 + 顺延着序列,这是第九句。 + 此处列举的是第十个说法。 + 进入新的篇章,这是第十一个句子。 + 下一段内容即为第十二个句子。 + 显而易见,这是第十三个叙述。 + 渐进地,我们来到第十四句话。 + 向下滚动,您会看到第十五个表达。 + 此刻,呈现在眼前的是第十六个句子。 + 它们中的一个——第十七个句子在此。 + 如同链条般连接,这是第十八个断言。 + 按照顺序排列,接下来是第十九个话语。 + 逐一列举,这是第二十个陈述句。 + 结构相似,本例给出第二十一个实例句。 + 这是最初的陈述句。 + 首先表达的是这一个句子。 + 第一句内容即为此处所示。 + 这是起始的叙述段落。 + 开篇所展示的第一句话就是这个。 + 明媚的阳光洒满大地 + 窗外飘落粉色樱花瓣 + 笔尖轻触纸面思绪万千 + 深夜的月光如水般静谧 + 穿越丛林的小径蜿蜒曲折 + 浅酌清茶品味人生百态 + 破晓时分雄鸡一唱天下白 + 草原上奔驰的骏马无拘无束 + 秋叶纷飞描绘季节更替画卷 + 寒冬雪夜炉火旁围坐共话家常 + kszdRjYXw + pfsMgTlVHnB + uQaGxIbWz + ZtqNhPmKcOe + jfyrXsStVUo + wDiEgLkZbn + yhNvAfUmqC + TpKjxMrWgs + eBzHUaFJtYd + oQnXcVSiPkL + 00000 + """ + list2 = list.strip().split('\n') + for item in list2: + print_result(text1, item, boundary) + + +if __name__ == '__main__': + test(0.9)