From ddb31547a2980807aa0bd25ea742520b9c3fd5b8 Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 15:15:36 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E5=B0=8F=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=8A=A0=E5=90=88=E5=B9=B6List=E5=92=8C?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=A4=B9=E7=95=8C=E9=9D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/i18n/locale_diff.py | 2 +- tools/srt_slicer/i18n/locale/en_US.json | 11 ++ tools/srt_slicer/i18n/locale/zh_CN.json | 11 ++ tools/srt_slicer/srt_utils.py | 42 +++++- tools/srt_slicer/webui.py | 182 ++++++++++++++++-------- 5 files changed, 184 insertions(+), 64 deletions(-) diff --git a/tools/i18n/locale_diff.py b/tools/i18n/locale_diff.py index 5fb505d8..327cb1ed 100644 --- a/tools/i18n/locale_diff.py +++ b/tools/i18n/locale_diff.py @@ -3,7 +3,7 @@ import os from collections import OrderedDict # dir_path = "./i18n/locale" # The path to the i18n locale directory, you can change it to your own path -dir_path = "./Inference/i18n/locale" +dir_path = "./tools/srt_slicer/i18n/locale" # Define the standard file name standard_file = os.path.join(dir_path, "zh_CN.json") diff --git a/tools/srt_slicer/i18n/locale/en_US.json b/tools/srt_slicer/i18n/locale/en_US.json index b1bf423e..415d36d2 100644 --- a/tools/srt_slicer/i18n/locale/en_US.json +++ b/tools/srt_slicer/i18n/locale/en_US.json @@ -1,9 +1,13 @@ { + "List 合并小工具": "List Merge Tool", "SRT合并切分插件": "SRT Merge and Split Plugin", "SRT文件": "SRT File", + "SRT编辑界面": "SRT Edit Interface", "srt文件内容": "SRT File Content", "上传SRT文件": "Upload SRT File", "上传文件": "Upload Files", + "两个文件夹不能相同!!!": "The two folders cannot be the same!!!", + "主文件夹": "Main Folder", "作者: ": "Author: ", "使用方法": "How to Use", "保存合并后字幕": "Save Merged Subtitles", @@ -12,15 +16,19 @@ "允许最短长度": "Minimum Allowed Length", "内容预览": "Content Preview", "切分与保存": "Split and Save", + "切分完成": "Split Completed", "切分并保存音频、list": "Split and Save Audio, List", "切分预览": "Split Preview", "判定为短间隔时长": "Judged as Short Interval Duration", + "到": " to ", "前置保留时间": "Preceding Retention Time", "前置添加静音时间": "Prepend Silence Time", "句末加句号": "Add Period at the End of Sentence", "合并后srt文本": "Merged SRT Text", + "合并后的List": "Merged List", "合并字幕": "Merge Subtitles", "合并字幕设置": "Subtitle Merge Settings", + "合并文件夹与List": "Merge Folder and List", "后置保留时间": "Following Retention Time", "后置添加静音时间": "Append Silence Time", "扫描文件夹": "Scan Folder", @@ -33,8 +41,11 @@ "最大间隔时间": "Maximum Interval Time", "最长允许单句长度": "Maximum Allowed Sentence Length", "根据面板合并短句并过滤你不希望出现的句子。": "Merge short sentences according to the panel and filter out sentences you do not want to appear.", + "次文件夹": "Second Folder", + "正在切分音频": "Splitting Audio", "正在建设,敬请期待": "Under Construction, Stay Tuned", "注意:该文件夹已存在": "Warning: The folder already exists", + "角色名称,留空使用主文件夹的": "Role Name, Leave Blank to Use Main Folder's", "语言": "Language", "读取文件": "Read File", "读取本地文件": "Read Local File", diff --git a/tools/srt_slicer/i18n/locale/zh_CN.json b/tools/srt_slicer/i18n/locale/zh_CN.json index e235eab3..27567f00 100644 --- a/tools/srt_slicer/i18n/locale/zh_CN.json +++ b/tools/srt_slicer/i18n/locale/zh_CN.json @@ -1,9 +1,13 @@ { + "List 合并小工具": "List 合并小工具", "SRT合并切分插件": "SRT合并切分插件", "SRT文件": "SRT文件", + "SRT编辑界面": "SRT编辑界面", "srt文件内容": "srt文件内容", "上传SRT文件": "上传SRT文件", "上传文件": "上传文件", + "两个文件夹不能相同!!!": "两个文件夹不能相同!!!", + "主文件夹": "主文件夹", "作者: ": "作者: ", "使用方法": "使用方法", "保存合并后字幕": "保存合并后字幕", @@ -12,15 +16,19 @@ "允许最短长度": "允许最短长度", "内容预览": "内容预览", "切分与保存": "切分与保存", + "切分完成": "切分完成", "切分并保存音频、list": "切分并保存音频、list", "切分预览": "切分预览", "判定为短间隔时长": "判定为短间隔时长", + "到": "到", "前置保留时间": "前置保留时间", "前置添加静音时间": "前置添加静音时间", "句末加句号": "句末加句号", "合并后srt文本": "合并后srt文本", + "合并后的List": "合并后的List", "合并字幕": "合并字幕", "合并字幕设置": "合并字幕设置", + "合并文件夹与List": "合并文件夹与List", "后置保留时间": "后置保留时间", "后置添加静音时间": "后置添加静音时间", "扫描文件夹": "扫描文件夹", @@ -33,8 +41,11 @@ "最大间隔时间": "最大间隔时间", "最长允许单句长度": "最长允许单句长度", "根据面板合并短句并过滤你不希望出现的句子。": "根据面板合并短句并过滤你不希望出现的句子。", + "次文件夹": "次文件夹", + "正在切分音频": "正在切分音频", "正在建设,敬请期待": "正在建设,敬请期待", "注意:该文件夹已存在": "注意:该文件夹已存在", + "角色名称,留空使用主文件夹的": "角色名称,留空使用主文件夹的", "语言": "语言", "读取文件": "读取文件", "读取本地文件": "读取本地文件", diff --git a/tools/srt_slicer/srt_utils.py b/tools/srt_slicer/srt_utils.py index 6dd226d6..1f030d23 100644 --- a/tools/srt_slicer/srt_utils.py +++ b/tools/srt_slicer/srt_utils.py @@ -1,4 +1,5 @@ import srt +import shutil def parse_srt_with_lib(content): @@ -87,10 +88,47 @@ def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserv try: audio = pydub.AudioSegment.from_file(audio_path) sliced_audio = audio[int(start * 1000):int(end * 1000)] - file_name = f'{i + 1:03d}.{format}' + file_name = f'{character}_{i + 1:03d}.{format}' save_path = os.path.join(save_folder, file_name) sliced_audio.export(save_path, format=format) f.write(f"{file_name}|{character}|{language}|{subtitle.content}\n") except Exception as e: raise e - \ No newline at end of file + +def merge_list_folders(first_list_file, second_list_file, character, first_folder, second_folder): + merged_lines = [] + character1 = "" + filenames = set() + with open(first_list_file, 'r', encoding="utf-8") as f: + first_list = f.readlines() + for line in first_list: + filename, character1, language, content = line.split('|') + filenames.add(filename) + if character=="" or character is None: + character = character1 + new_line = f"{filename}|{character}|{language}|{content}" + merged_lines.append(new_line) + with open(second_list_file, 'r', encoding="utf-8") as f: + second_list = f.readlines() + for line in second_list: + filename, _, language, content = line.split('|') + orig_filename = filename + num = 1 + while filename in filenames: + filename = f"{filename.rsplit('.', 1)[0]}_{num}.{filename.rsplit('.', 1)[1]}" + num += 1 + try: + os.rename(os.path.join(second_folder, orig_filename), os.path.join(first_folder, filename)) + except Exception as e: + raise e + new_line = f"{filename}|{character}|{language}|{content}" + merged_lines.append(new_line) + os.remove(second_list_file) + if not os.listdir(second_folder): + os.rmdir(second_folder) + with open(first_list_file, 'w', encoding="utf-8") as f: + f.writelines(merged_lines) + return "\n".join(merged_lines) + + + \ No newline at end of file diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index 436a0484..bd586a6d 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -1,9 +1,19 @@ import gradio as gr import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) +sys.path.append(os.path.abspath(os.path.dirname(__file__))) sys.path.append('.') -sys.path.append('..') -from tools.srt_slicer.srt_utils import merge_subtitles_with_lib, parse_srt_with_lib, generate_srt_with_lib, slice_audio_with_lib, count_words_multilang +from srt_utils import ( + merge_subtitles_with_lib, + parse_srt_with_lib, + generate_srt_with_lib, + slice_audio_with_lib, + count_words_multilang, + merge_list_folders +) from i18n.i18n import I18nAuto @@ -19,7 +29,6 @@ def merge_srt(input_text, output_text, short_interval=0.1, max_interval=1, max_t return output_text - def slice_audio( input_audio, save_folder, @@ -46,6 +55,7 @@ def slice_audio( os.makedirs(character_folder, exist_ok=True) subtitles = parse_srt_with_lib(output_text) try: + gr.Info(f"{i18n('正在切分音频')} {input_audio} {i18n('到')} {character_folder}") slice_audio_with_lib( input_audio, character_folder, @@ -58,9 +68,10 @@ def slice_audio( language, character, ) + gr.Info(f"{i18n('切分完成')} ") except Exception as e: gr.Warning(f"Can't Slice, Error: {e}") - + def get_relative_path(path, base): return os.path.relpath(path, base) @@ -124,13 +135,13 @@ def load_srt_from_file(srt_file): return f.read() except: return "" - + def load_audio_from_file(audio_file): try: return gr.Audio(audio_file) except: return gr.Audio(value=None) - + def load_from_dropdown(input_folder, srt_files_list, audio_files_list): if isinstance(srt_files_list, str) and isinstance(audio_files_list, str): srt_file= os.path.join(input_folder, srt_files_list) @@ -151,7 +162,40 @@ def save_srt_to_file(srt_text, save_folder, character): srt_file = os.path.join(character_folder, "merged.srt") with open(srt_file, "w", encoding="utf-8") as f: f.write(srt_text) + +def scan_list_folders(folder): + if not os.path.exists(folder): + os.makedirs(folder, exist_ok=True) + list_folders = [] + for list_folder in os.listdir(folder): + if os.path.isdir(os.path.join(folder, list_folder)): + list_folders.append(get_relative_path(os.path.join(folder, list_folder), folder)) + first_list_folder = "" + second_list_folder = "" + if len(list_folders) > 0: + first_list_folder = second_list_folder = list_folders[0] + if len(list_folders) > 1: + second_list_folder = list_folders[1] + return gr.Dropdown(list_folders, value=first_list_folder), gr.Dropdown(list_folders, value=second_list_folder) + +def preview_merged_list(first_list_folder, second_list_folder, merge_list_character_name, save_folder): + if first_list_folder == "" or second_list_folder == "": + return "" + if first_list_folder == second_list_folder: + gr.Warning(i18n("两个文件夹不能相同!!!")) + return "" + first_list_folder = os.path.join(save_folder, first_list_folder) + second_list_folder = os.path.join(save_folder, second_list_folder) + print(f"first_list_folder: {first_list_folder}, second_list_folder: {second_list_folder}") + first_list = os.path.join(first_list_folder, [file for file in os.listdir(first_list_folder) if file.lower().endswith(".list")][0]) + second_list = os.path.join(second_list_folder, [file for file in os.listdir(second_list_folder) if file.lower().endswith(".list")][0]) + try: + return merge_list_folders(first_list, second_list, merge_list_character_name, first_list_folder, second_list_folder) + except Exception as e: + gr.Warning(f"Can't Merge, Error: {e}") + return "" + from datetime import datetime def change_character_name(input_audio): @@ -178,63 +222,79 @@ with gr.Blocks() as app:
  • {i18n("根据面板合并短句并过滤你不希望出现的句子。")}
  • {i18n("随后保存成切分好的音频与list文件。")}
  • """) - - - with gr.Row(): - with gr.Column(scale=2) as input_col: - with gr.Tabs(): - with gr.Tab(i18n("读取本地文件")): - input_folder = gr.Textbox("input/srt_and_audios", label=i18n("文件夹路径"),interactive=True) - scan_button = gr.Button(i18n("扫描文件夹"), variant="secondary",interactive=True) - srt_files_list = gr.Dropdown([], label=i18n("SRT文件"),interactive=True) - audio_files_list = gr.Dropdown([], label=i18n("音频文件"),interactive=True) - srt_read_button = gr.Button(i18n("读取文件"), variant="secondary",interactive=True) - with gr.Tab(i18n("上传文件")): - input_srt_file = gr.File(label=i18n("上传SRT文件"), type="filepath", file_types=["srt"]) - upload_audio = gr.Audio(type="filepath",label=i18n("音频文件")) - # input_audio_file = gr.File(label=i18n("上传音频文件"), type="audio", file_types=["mp3", "wav", "ogg"]) - with gr.Tabs(): - with gr.Tab(i18n("内容预览")): - input_audio = gr.Textbox("", label=i18n("音频文件"),interactive=False) - input_text = gr.Textbox("", lines=20, max_lines=30, label=i18n("srt文件内容")) - input_srt_file.change(load_srt_from_file, [input_srt_file], [input_text]) - with gr.Column(scale=1) as control_col: - with gr.Tabs(): - with gr.Tab(i18n("合并字幕设置")): - merge_zero_interval = gr.Checkbox(label=i18n("提前合并时间间隔很短的字幕"),interactive=True, value=True) - short_interval = gr.Slider(value=0.05, minimum=0, maximum=0.5, step=0.005, label=i18n("判定为短间隔时长"),interactive=True,visible=True) - max_interval = gr.Slider(value=0.8, minimum=0.1, maximum=10, step=0.1, label=i18n("最大间隔时间"),interactive=True) - max_text_length = gr.Slider(value=50,minimum=5,maximum=200,step=1, label=i18n("最长允许单句长度"),interactive=True) - add_period = gr.Checkbox(label=i18n("句末加句号"),interactive=True, value=True) - merge_button = gr.Button(i18n("合并字幕"), variant="primary") + with gr.Tabs(): + with gr.Tab(i18n("SRT编辑界面")): + with gr.Row(): + with gr.Column(scale=2) as input_col: + with gr.Tabs(): + with gr.Tab(i18n("读取本地文件")): + input_folder = gr.Textbox("input/srt_and_audios", label=i18n("文件夹路径"),interactive=True) + scan_button = gr.Button(i18n("扫描文件夹"), variant="secondary",interactive=True) + srt_files_list = gr.Dropdown([], label=i18n("SRT文件"),interactive=True) + audio_files_list = gr.Dropdown([], label=i18n("音频文件"),interactive=True) + srt_read_button = gr.Button(i18n("读取文件"), variant="secondary",interactive=True) + with gr.Tab(i18n("上传文件")): + input_srt_file = gr.File(label=i18n("上传SRT文件"), type="filepath", file_types=["srt"]) + upload_audio = gr.Audio(type="filepath",label=i18n("音频文件")) + # input_audio_file = gr.File(label=i18n("上传音频文件"), type="audio", file_types=["mp3", "wav", "ogg"]) + with gr.Tabs(): + with gr.Tab(i18n("内容预览")): + input_audio = gr.Textbox("", label=i18n("音频文件"),interactive=False) + input_text = gr.Textbox("", lines=20, max_lines=30, label=i18n("srt文件内容")) + input_srt_file.change(load_srt_from_file, [input_srt_file], [input_text]) + with gr.Column(scale=1) as control_col: + with gr.Tabs(): + with gr.Tab(i18n("合并字幕设置")): + merge_zero_interval = gr.Checkbox(label=i18n("提前合并时间间隔很短的字幕"),interactive=True, value=True) + short_interval = gr.Slider(value=0.05, minimum=0, maximum=0.5, step=0.005, label=i18n("判定为短间隔时长"),interactive=True,visible=True) + max_interval = gr.Slider(value=0.8, minimum=0.1, maximum=10, step=0.1, label=i18n("最大间隔时间"),interactive=True) + max_text_length = gr.Slider(value=50,minimum=5,maximum=200,step=1, label=i18n("最长允许单句长度"),interactive=True) + add_period = gr.Checkbox(label=i18n("句末加句号"),interactive=True, value=True) + merge_button = gr.Button(i18n("合并字幕"), variant="primary") - with gr.Tab(i18n("过滤设置")): - min_length = gr.Slider(value=5, minimum=0, maximum=20, step=1, label=i18n("允许最短长度"),interactive=True) - filter_english = gr.Checkbox(label=i18n("过滤带有英文的"),interactive=True) - filter_words = gr.Textbox("", label=i18n("过滤词语,一行一个"),lines=5,max_lines=10,interactive=True) - filter_button = gr.Button(i18n("过滤字幕"), variant="primary",interactive=False) - with gr.Tab(i18n("切分与保存")): - with gr.Group(): - pre_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("前置保留时间"),interactive=True) - post_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置保留时间"),interactive=True) - pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) - post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) - with gr.Group(): - language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) - audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) - with gr.Group(): - save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) - character = gr.Textbox("character", label=i18n("保存子文件夹名称"),interactive=True) - character_warning = gr.Textbox(i18n("注意:该文件夹已存在"), label=i18n("提示"),interactive=False,visible=False) - save_srt_button = gr.Button(i18n("保存合并后字幕"),variant="secondary",interactive=True) - slice_audio_button = gr.Button(i18n("切分并保存音频、list"), variant="primary",interactive=False) + with gr.Tab(i18n("过滤设置")): + min_length = gr.Slider(value=5, minimum=0, maximum=20, step=1, label=i18n("允许最短长度"),interactive=True) + filter_english = gr.Checkbox(label=i18n("过滤带有英文的"),interactive=True) + filter_words = gr.Textbox("", label=i18n("过滤词语,一行一个"),lines=5,max_lines=10,interactive=True) + filter_button = gr.Button(i18n("过滤字幕"), variant="primary",interactive=False) + with gr.Tab(i18n("切分与保存")): + with gr.Group(): + pre_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("前置保留时间"),interactive=True) + post_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置保留时间"),interactive=True) + pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) + post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) + with gr.Group(): + language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) + audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) + with gr.Group(): + save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) + character = gr.Textbox("character", label=i18n("保存子文件夹名称"),interactive=True) + character_warning = gr.Textbox(i18n("注意:该文件夹已存在"), label=i18n("提示"),interactive=False,visible=False) + save_srt_button = gr.Button(i18n("保存合并后字幕"),variant="secondary",interactive=True) + slice_audio_button = gr.Button(i18n("切分并保存音频、list"), variant="primary",interactive=False) - with gr.Column(scale=2) as output_col: - with gr.Tabs(): - with gr.Tab(i18n("合并后srt文本")): - output_text = gr.Textbox("", lines=20, max_lines=30, label="Sliced SRT") - with gr.Tab(i18n("切分预览")): - gr.Textbox(i18n("正在建设,敬请期待"), label=i18n("提示"),interactive=False) + with gr.Column(scale=2) as output_col: + with gr.Tabs(): + with gr.Tab(i18n("合并后srt文本")): + output_text = gr.Textbox("", lines=20, max_lines=30, label="Sliced SRT") + with gr.Tab(i18n("切分预览")): + gr.Textbox(i18n("正在建设,敬请期待"), label=i18n("提示"),interactive=False) + with gr.Tab(i18n("List 合并小工具")): + with gr.Row(): + with gr.Column(scale=2): + scan_list_folder = gr.Textbox("output/sliced_audio", label=i18n("文件夹路径"),interactive=True) + scan_list_button = gr.Button(i18n("扫描文件夹"), variant="secondary") + first_list_folder = gr.Dropdown([], label=i18n("主文件夹"),interactive=True) + second_list_folder = gr.Dropdown([], label=i18n("次文件夹"),interactive=True) + merge_list_character_name = gr.Textbox("", label=i18n("角色名称,留空使用主文件夹的"),interactive=True) + merge_list_button = gr.Button(i18n("合并文件夹与List"), variant="primary") + with gr.Column(scale=2): + list_preview = gr.Textbox("", lines=20, max_lines=30, label=i18n("合并后的List")) + + scan_list_button.click(scan_list_folders, [scan_list_folder], [first_list_folder, second_list_folder]) + merge_list_button.click(preview_merged_list, [first_list_folder, second_list_folder, merge_list_character_name, scan_list_folder], [list_preview]) + save_folder.change(lambda x:gr.Textbox(value=x), [save_folder], [scan_list_folder]) + scan_list_folder.change(lambda x:gr.Textbox(value=x), [scan_list_folder], [save_folder]) scan_button.click(get_srt_and_audio_files, [input_folder], [srt_files_list, audio_files_list]) merge_zero_interval.change(lambda x: gr.update(visible=x), [merge_zero_interval],[short_interval]) srt_files_list.change(change_srt_file, [input_folder, srt_files_list], [audio_files_list])