From f1245425fd7b4ffa2f7de5d846fd8257c745d094 Mon Sep 17 00:00:00 2001 From: XTer Date: Wed, 20 Mar 2024 23:43:40 +0800 Subject: [PATCH 01/10] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=B8=80?= =?UTF-8?q?=E4=B8=AASRT=E5=90=88=E5=B9=B6=E5=88=87=E5=88=86=E6=8F=92?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 3 +- tools/i18n/i18n.py | 10 +- tools/i18n/locale_diff.py | 7 +- tools/i18n/scan_i18n.py | 57 ++++- tools/srt_slicer/i18n/locale/en_US.json | 47 ++++ tools/srt_slicer/i18n/locale/zh_CN.json | 47 ++++ tools/srt_slicer/srt_utils.py | 96 ++++++++ tools/srt_slicer/webui.py | 298 ++++++++++++++++++++++++ 8 files changed, 544 insertions(+), 21 deletions(-) create mode 100644 tools/srt_slicer/i18n/locale/en_US.json create mode 100644 tools/srt_slicer/i18n/locale/zh_CN.json create mode 100644 tools/srt_slicer/srt_utils.py create mode 100644 tools/srt_slicer/webui.py diff --git a/requirements.txt b/requirements.txt index 73912d01..0ce7554c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,4 +25,5 @@ jieba_fast jieba LangSegment>=0.2.0 Faster_Whisper -wordsegment \ No newline at end of file +wordsegment +srt \ No newline at end of file diff --git a/tools/i18n/i18n.py b/tools/i18n/i18n.py index 00e91bf3..1861a034 100644 --- a/tools/i18n/i18n.py +++ b/tools/i18n/i18n.py @@ -3,22 +3,22 @@ import locale import os -def load_language_list(language): - with open(f"./i18n/locale/{language}.json", "r", encoding="utf-8") as f: +def load_language_list(language, locale_path="./i18n/locale"): + with open(os.path.join(locale_path, f"{language}.json"), "r", encoding="utf-8") as f: language_list = json.load(f) return language_list class I18nAuto: - def __init__(self, language=None): + def __init__(self, language=None, locale_path="./i18n/locale"): if language in ["Auto", None]: language = locale.getdefaultlocale()[ 0 ] # getlocale can't identify the system's language ((None, None)) - if not os.path.exists(f"./i18n/locale/{language}.json"): + if not os.path.exists(os.path.join(locale_path, f"{language}.json")): language = "en_US" self.language = language - self.language_map = load_language_list(language) + self.language_map = load_language_list(language, locale_path) def __call__(self, key): return self.language_map.get(key, key) diff --git a/tools/i18n/locale_diff.py b/tools/i18n/locale_diff.py index 674f7dd2..e5527be3 100644 --- a/tools/i18n/locale_diff.py +++ b/tools/i18n/locale_diff.py @@ -2,11 +2,12 @@ import json import os from collections import OrderedDict +dir_path = "./i18n/locale" # The path to the i18n locale directory, you can change it to your own path + # Define the standard file name -standard_file = "locale/zh_CN.json" +standard_file = os.path.join(dir_path, "zh_CN.json") # Find all JSON files in the directory -dir_path = "locale/" languages = [ os.path.join(dir_path, f) for f in os.listdir(dir_path) @@ -30,7 +31,7 @@ for lang_file in languages: # Add any missing keys to the language file for key in diff: - lang_data[key] = key + lang_data[key] = standard_data[key] # Del any extra keys to the language file for key in miss: diff --git a/tools/i18n/scan_i18n.py b/tools/i18n/scan_i18n.py index f3e52cf4..0db41df0 100644 --- a/tools/i18n/scan_i18n.py +++ b/tools/i18n/scan_i18n.py @@ -1,7 +1,20 @@ import ast -import glob import json from collections import OrderedDict +import os + +locale_path = "./i18n/locale" # The path to the i18n locale directory, you can change it to your own path +scan_list = ["./", + "GPT_SoVITS/", + "tools/" + ] # The path to the directory you want to scan, you can change it to your own path +scan_subfolders = False # Whether to scan subfolders + + +# 你想要保留的特殊词汇 +special_words_to_keep = { + +} def extract_i18n_strings(node): @@ -21,20 +34,32 @@ def extract_i18n_strings(node): return i18n_strings +strings = [] -# scan the directory for all .py files (recursively) # for each file, parse the code into an AST # for each AST, extract the i18n strings - -strings = [] -for filename in glob.iglob("**/*.py", recursive=True): - with open(filename, "r") as f: +def scan_i18n_strings(filename): + with open(filename, "r", encoding="utf-8") as f: code = f.read() if "I18nAuto" in code: tree = ast.parse(code) i18n_strings = extract_i18n_strings(tree) print(filename, len(i18n_strings)) strings.extend(i18n_strings) + + +# scan the directory for all .py files (recursively) +if scan_subfolders: + for folder in scan_list: + for dirpath, dirnames, filenames in os.walk(folder): + for filename in [f for f in filenames if f.endswith(".py")]: + scan_i18n_strings(os.path.join(dirpath, filename)) +else: + for folder in scan_list: + for filename in os.listdir(folder): + if filename.endswith(".py"): + scan_i18n_strings(os.path.join(folder, filename)) + code_keys = set(strings) """ n_i18n.py @@ -49,11 +74,13 @@ print() print("Total unique:", len(code_keys)) -standard_file = "i18n/locale/zh_CN.json" -with open(standard_file, "r", encoding="utf-8") as f: - standard_data = json.load(f, object_pairs_hook=OrderedDict) -standard_keys = set(standard_data.keys()) - +standard_file = os.path.join(locale_path, "zh_CN.json") +try: + with open(standard_file, "r", encoding="utf-8") as f: + standard_data = json.load(f, object_pairs_hook=OrderedDict) + standard_keys = set(standard_data.keys()) +except FileNotFoundError: + standard_keys = set() # Define the standard file name unused_keys = standard_keys - code_keys print("Unused keys:", len(unused_keys)) @@ -64,12 +91,18 @@ missing_keys = code_keys - standard_keys print("Missing keys:", len(missing_keys)) for missing_key in missing_keys: print("\t", missing_key) + + code_keys_dict = OrderedDict() for s in strings: - code_keys_dict[s] = s + if s in special_words_to_keep: + code_keys_dict[s] = special_words_to_keep[s] + else: + code_keys_dict[s] = s # write back +os.makedirs(locale_path, exist_ok=True) with open(standard_file, "w", encoding="utf-8") as f: json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) f.write("\n") diff --git a/tools/srt_slicer/i18n/locale/en_US.json b/tools/srt_slicer/i18n/locale/en_US.json new file mode 100644 index 00000000..add77f61 --- /dev/null +++ b/tools/srt_slicer/i18n/locale/en_US.json @@ -0,0 +1,47 @@ +{ + "SRT合并切分插件": "SRT Merge and Split Plugin", + "SRT文件": "SRT File", + "srt文件内容": "SRT File Content", + "上传SRT文件": "Upload SRT File", + "作者: ": "Author: ", + "使用方法": "How to Use", + "保存合并后字幕": "Save Merged Subtitles", + "保存子文件夹名称": "Save Subfolder Name", + "保存文件夹": "Save Folder", + "允许最短长度": "Minimum Allowed Length", + "内容预览": "Content Preview", + "切分与保存": "Split and Save", + "切分并保存音频、list": "Split and Save Audio, List", + "切分预览": "Split Preview", + "判定为短间隔时长": "Judged as Short Interval Duration", + "前置保留时间": "Preceding Retention Time", + "前置添加静音时间": "Prepend Silence Time", + "句末加句号": "Add Period at the End of Sentence", + "合并后srt文本": "Merged SRT Text", + "合并字幕": "Merge Subtitles", + "合并字幕设置": "Subtitle Merge Settings", + "后置保留时间": "Following Retention Time", + "后置添加静音时间": "Append Silence Time", + "扫描文件夹": "Scan Folder", + "找不到字幕!!!": "Subtitles Not Found!!!", + "找不到音频!!!": "Audio Not Found!!!", + "提供SRT文件(可使用剪映或者ASR工具获得)与原始音频文件。": "Provide SRT File (can be obtained via Clip or ASR tools) and Original Audio File.", + "提前合并时间间隔很短的字幕": "Merge Subtitles with Short Intervals in Advance", + "提示": "Tips", + "文件夹路径": "Folder Path", + "最大间隔时间": "Maximum Interval Time", + "最长允许单句长度": "Maximum Allowed Sentence Length", + "根据面板合并短句并过滤你不希望出现的句子。": "Merge short sentences according to the panel and filter out sentences you do not want to appear.", + "正在建设,敬请期待": "Under Construction, Stay Tuned", + "语言": "Language", + "读取文件": "Read File", + "读取本地文件": "Read Local File", + "过滤字幕": "Filter Subtitles", + "过滤带有英文的": "Filter Out English", + "过滤设置": "Filter Settings", + "过滤词语,一行一个": "Filter Words, One Per Line", + "这是一个插件,用于依靠SRT文件得到切分与打标好的音频。": "This is a plugin for obtaining split and tagged audio based on SRT files.", + "随后保存成切分好的音频与list文件。": "Then save as split audio and list files.", + "音频文件": "Audio File", + "音频格式": "Audio Format" + } \ No newline at end of file diff --git a/tools/srt_slicer/i18n/locale/zh_CN.json b/tools/srt_slicer/i18n/locale/zh_CN.json new file mode 100644 index 00000000..7a0a7e4b --- /dev/null +++ b/tools/srt_slicer/i18n/locale/zh_CN.json @@ -0,0 +1,47 @@ +{ + "SRT合并切分插件": "SRT合并切分插件", + "SRT文件": "SRT文件", + "srt文件内容": "srt文件内容", + "上传SRT文件": "上传SRT文件", + "作者: ": "作者: ", + "使用方法": "使用方法", + "保存合并后字幕": "保存合并后字幕", + "保存子文件夹名称": "保存子文件夹名称", + "保存文件夹": "保存文件夹", + "允许最短长度": "允许最短长度", + "内容预览": "内容预览", + "切分与保存": "切分与保存", + "切分并保存音频、list": "切分并保存音频、list", + "切分预览": "切分预览", + "判定为短间隔时长": "判定为短间隔时长", + "前置保留时间": "前置保留时间", + "前置添加静音时间": "前置添加静音时间", + "句末加句号": "句末加句号", + "合并后srt文本": "合并后srt文本", + "合并字幕": "合并字幕", + "合并字幕设置": "合并字幕设置", + "后置保留时间": "后置保留时间", + "后置添加静音时间": "后置添加静音时间", + "扫描文件夹": "扫描文件夹", + "找不到字幕!!!": "找不到字幕!!!", + "找不到音频!!!": "找不到音频!!!", + "提供SRT文件(可使用剪映或者ASR工具获得)与原始音频文件。": "提供SRT文件(可使用剪映或者ASR工具获得)与原始音频文件。", + "提前合并时间间隔很短的字幕": "提前合并时间间隔很短的字幕", + "提示": "提示", + "文件夹路径": "文件夹路径", + "最大间隔时间": "最大间隔时间", + "最长允许单句长度": "最长允许单句长度", + "根据面板合并短句并过滤你不希望出现的句子。": "根据面板合并短句并过滤你不希望出现的句子。", + "正在建设,敬请期待": "正在建设,敬请期待", + "语言": "语言", + "读取文件": "读取文件", + "读取本地文件": "读取本地文件", + "过滤字幕": "过滤字幕", + "过滤带有英文的": "过滤带有英文的", + "过滤设置": "过滤设置", + "过滤词语,一行一个": "过滤词语,一行一个", + "这是一个插件,用于依靠SRT文件得到切分与打标好的音频。": "这是一个插件,用于依靠SRT文件得到切分与打标好的音频。", + "随后保存成切分好的音频与list文件。": "随后保存成切分好的音频与list文件。", + "音频文件": "音频文件", + "音频格式": "音频格式" +} diff --git a/tools/srt_slicer/srt_utils.py b/tools/srt_slicer/srt_utils.py new file mode 100644 index 00000000..6dd226d6 --- /dev/null +++ b/tools/srt_slicer/srt_utils.py @@ -0,0 +1,96 @@ +import srt + +def parse_srt_with_lib(content): + + subtitles = list(srt.parse(content)) + return subtitles + +def generate_srt_with_lib(subtitles): + content = srt.compose(subtitles) + return content + +def merge_subtitles_with_lib(subtitles, short_interval, max_interval, max_text_length=30, add_period=True, merge_zero_interval=True): + # 标点符号 + punctuations = ["。","!", "!", "?", "?", ";", ";", "…"] + punctuations_extanded = punctuations + punctuations_extanded.extend([ ":", ":", ",", ",", "—",]) + + # 直接合并间隔特别短的字幕 + if merge_zero_interval: + eps = short_interval + for i in range(len(subtitles) - 1, 0, -1): + if subtitles[i-1].content[-1] in punctuations_extanded: + continue + if abs(subtitles[i].start.total_seconds() - subtitles[i-1].end.total_seconds()) < eps: + subtitles[i - 1].end = subtitles[i].end + subtitles[i - 1].content += subtitles[i].content + subtitles.pop(i) + + merged_subtitles = [] + current_subtitle = None + for subtitle in subtitles: + if current_subtitle is None: + current_subtitle = subtitle + else: + current_end = current_subtitle.end.total_seconds() + next_start = subtitle.start.total_seconds() + if current_subtitle.content[-1] not in punctuations and (next_start - current_end <= max_interval and count_words_multilang(current_subtitle.content + subtitle.content) < max_text_length): + current_subtitle.end = subtitle.end + comma = ',' if current_subtitle.content[-1] not in punctuations_extanded else '' + current_subtitle.content += comma + subtitle.content + + else: + if add_period and current_subtitle.content[-1] not in punctuations_extanded: + current_subtitle.content += '。' + merged_subtitles.append(current_subtitle) + current_subtitle = subtitle + if current_subtitle is not None: + merged_subtitles.append(current_subtitle) + # 重新分配id,因为srt.compose需要id连续 + for i, subtitle in enumerate(merged_subtitles, start=1): + subtitle.index = i + return merged_subtitles + + + +def count_words_multilang(text): + # 初始化计数器 + word_count = 0 + in_word = False + + for char in text: + if char.isspace(): # 如果当前字符是空格 + in_word = False + elif char.isascii() and not in_word: # 如果是ASCII字符(英文)并且不在单词内 + word_count += 1 # 新的英文单词 + in_word = True + elif not char.isascii(): # 如果字符非英文 + word_count += 1 # 每个非英文字符单独计为一个字 + + return word_count + +import pydub, os + +def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserve_time, post_preserve_time, pre_silence_time, post_silence_time, language='auto', character='character'): + list_file = os.path.join(save_folder, 'datamapping.list') + with open(list_file, 'w', encoding="utf-8") as f: + for i in range(len(subtitles)): + subtitle = subtitles[i] + start = subtitle.start.total_seconds() - pre_preserve_time + end = subtitle.end.total_seconds() + post_preserve_time + if i < len(subtitles) - 1: + next_subtitle = subtitles[i + 1] + end = min(end, 1.0/2*(subtitle.end.total_seconds()+next_subtitle.start.total_seconds())) + if i > 0: + prev_subtitle = subtitles[i - 1] + start = max(start, 1.0/2*(prev_subtitle.end.total_seconds()+subtitle.start.total_seconds())) + try: + audio = pydub.AudioSegment.from_file(audio_path) + sliced_audio = audio[int(start * 1000):int(end * 1000)] + file_name = f'{i + 1:03d}.{format}' + save_path = os.path.join(save_folder, file_name) + sliced_audio.export(save_path, format=format) + f.write(f"{file_name}|{character}|{language}|{subtitle.content}\n") + except Exception as e: + raise e + \ No newline at end of file diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py new file mode 100644 index 00000000..00fcc827 --- /dev/null +++ b/tools/srt_slicer/webui.py @@ -0,0 +1,298 @@ +import gradio as gr + +import sys +sys.path.append('.') +sys.path.append('..') +from srt_utils import merge_subtitles_with_lib, parse_srt_with_lib, generate_srt_with_lib, slice_audio_with_lib, count_words_multilang + + +from i18n.i18n import I18nAuto + +import os + +i18n = I18nAuto(language="en_US", locale_path="./tools/srt_slicer/i18n/locale") + +def merge_srt(input_text, output_text, short_interval=0.1, max_interval=1, max_text_length=30, add_period=True, merge_zero_interval=True): + original_subtitles = parse_srt_with_lib(input_text) + merged_subtitles = merge_subtitles_with_lib(original_subtitles, short_interval, max_interval, max_text_length, add_period, merge_zero_interval) + output_text = generate_srt_with_lib(merged_subtitles) + return output_text + + + +def slice_audio( + input_audio, + save_folder, + audio_format, + output_text, + pre_preserve_time, + post_preserve_time, + pre_silence_time, + post_silence_time, + language, + character, + +): + if isinstance(input_audio, str) and input_audio != "": + pass + else: + gr.Warning(i18n("找不到音频!!!")) + return + if output_text == "": + gr.Warning(i18n("找不到字幕!!!")) + return + + character_folder = os.path.join(save_folder, character) + os.makedirs(character_folder, exist_ok=True) + subtitles = parse_srt_with_lib(output_text) + try: + slice_audio_with_lib( + input_audio, + character_folder, + audio_format, + subtitles, + pre_preserve_time, + post_preserve_time, + pre_silence_time, + post_silence_time, + language, + character, + ) + except Exception as e: + gr.Warning(f"Can't Slice, Error: {e}") + +def get_relative_path(path, base): + return os.path.relpath(path, base) + +def get_srt_and_audio_files(folder): + if not os.path.exists(folder): + os.makedirs(folder, exist_ok=True) + srt_files = [] + audio_files = [] + audio_file_formats = ["mp3", "wav", "ogg", "flac"] + for root, dirs, files in os.walk(folder): + for file in files: + if file.lower().endswith(".srt"): + srt_files.append(get_relative_path(os.path.join(root, file), folder)) + for audio_file_format in audio_file_formats: + if file.lower().endswith(audio_file_format): + audio_files.append(get_relative_path(os.path.join(root, file), folder)) + srt_file = "" + audio_file = "" + if len(srt_files) > 0: + srt_file = srt_files[0] + if len(audio_files) > 0: + audio_file = audio_files[0] + return gr.Dropdown(srt_files,value=srt_file), gr.Dropdown(audio_files,value=audio_file) + +def change_srt_file(folder,srt_file): + srt_folder = os.path.dirname(os.path.join(folder, srt_file)) + basename = os.path.basename(srt_file).rsplit(".", 1)[0] + audio_file_formats = ["mp3", "wav", "ogg", "flac"] + for file in os.listdir(srt_folder): + print(f"basename: {basename}, file: {file}") + if basename.lower() in file.lower(): + for audio_file_format in audio_file_formats: + if file.lower().endswith(audio_file_format): + return gr.Dropdown(value=get_relative_path(os.path.join(srt_folder, file), folder)) + return gr.Dropdown(interactive=True) + +def filter_srt(input_text, min_length, filter_english, filter_words): + subtitles = parse_srt_with_lib(input_text) + filtered_subtitles = [] + for subtitle in subtitles: + if count_words_multilang(subtitle.content) >= min_length: + flag = False + if filter_english: + for i in subtitle.content: + if i in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": + flag = True + break + if not flag and filter_words: + filter_words.replace("\r", "\n") + for word in filter_words.split("\n"): + if word in subtitle.content: + flag = True + break + if not flag: + filtered_subtitles.append(subtitle) + return generate_srt_with_lib(filtered_subtitles) + +def load_srt_from_file(srt_file): + try: + with open(srt_file, "r", encoding="utf-8") as f: + return f.read() + except: + return "" + +def load_audio_from_file(audio_file): + try: + return gr.Audio(audio_file) + except: + return gr.Audio(value=None) + +def load_from_dropdown(input_folder, srt_files_list, audio_files_list): + if isinstance(srt_files_list, str) and isinstance(audio_files_list, str): + srt_file= os.path.join(input_folder, srt_files_list) + audio_file = os.path.join(input_folder, audio_files_list) + return load_srt_from_file(srt_file), load_audio_from_file(audio_file) + else: + return "", gr.Audio(value=None) + +def enable_gr_elements(*args): + return [gr.update(interactive=True) for _ in args] + +def disable_gr_elements(*args): + return [gr.update(interactive=False) for _ in args] + +def save_srt_to_file(srt_text, save_folder, character): + character_folder = os.path.join(save_folder, character) + os.makedirs(character_folder, exist_ok=True) + srt_file = os.path.join(character_folder, "merged.srt") + with open(srt_file, "w", encoding="utf-8") as f: + f.write(srt_text) + +from datetime import datetime + +def change_character_name(input_audio): + try: + input_audio_name = os.path.basename(input_audio).rsplit(".", 1)[0] + character = input_audio_name[:20] + except: + character = datetime.now().strftime("%m%d%H%M") + return gr.Textbox(value=character) + + + +with gr.Blocks() as app: + with gr.Row(): + gr.HTML(f"""

{i18n("SRT合并切分插件")}

+

{i18n("这是一个插件,用于依靠SRT文件得到切分与打标好的音频。")}

{i18n("作者: ")}XTer

+

{i18n("使用方法")}

+
    +
  1. {i18n("提供SRT文件(可使用剪映或者ASR工具获得)与原始音频文件。")}
  2. +
  3. {i18n("根据面板合并短句并过滤你不希望出现的句子。")}
  4. +
  5. {i18n("随后保存成切分好的音频与list文件。")}
  6. +
""") + + + with gr.Row(): + with gr.Column(scale=2) as input_col: + with gr.Tabs(): + with gr.Tab(i18n("读取本地文件")): + input_folder = gr.Textbox("input/srt_and_audios", label=i18n("文件夹路径"),interactive=True) + scan_button = gr.Button(i18n("扫描文件夹"), variant="secondary",interactive=True) + srt_files_list = gr.Dropdown([], label=i18n("SRT文件"),interactive=True) + audio_files_list = gr.Dropdown([], label=i18n("音频文件"),interactive=True) + srt_read_button = gr.Button(i18n("读取文件"), variant="secondary",interactive=True) + with gr.Tab(i18n("上传SRT文件")): + input_srt_file = gr.File(label=i18n("上传SRT文件"), type="filepath", file_types=["srt"]) + # input_audio_file = gr.File(label=i18n("上传音频文件"), type="audio", file_types=["mp3", "wav", "ogg"]) + with gr.Tabs(): + with gr.Tab(i18n("内容预览")): + input_audio = gr.Audio(type="filepath",label=i18n("音频文件")) + input_text = gr.Textbox("", lines=20, max_lines=30, label=i18n("srt文件内容")) + input_srt_file.change(load_srt_from_file, [input_srt_file], [input_text]) + with gr.Column(scale=1) as control_col: + with gr.Tabs(): + with gr.Tab(i18n("合并字幕设置")): + merge_zero_interval = gr.Checkbox(label=i18n("提前合并时间间隔很短的字幕"),interactive=True, value=True) + short_interval = gr.Slider(value=0.05, minimum=0, maximum=0.5, step=0.005, label=i18n("判定为短间隔时长"),interactive=True,visible=True) + max_interval = gr.Slider(value=0.8, minimum=0.1, maximum=10, step=0.1, label=i18n("最大间隔时间"),interactive=True) + max_text_length = gr.Slider(value=50,minimum=5,maximum=200,step=1, label=i18n("最长允许单句长度"),interactive=True) + add_period = gr.Checkbox(label=i18n("句末加句号"),interactive=True, value=True) + merge_button = gr.Button(i18n("合并字幕"), variant="primary") + + with gr.Tab(i18n("过滤设置")): + min_length = gr.Slider(value=5, minimum=0, maximum=20, step=1, label=i18n("允许最短长度"),interactive=True) + filter_english = gr.Checkbox(label=i18n("过滤带有英文的"),interactive=True) + filter_words = gr.Textbox("", label=i18n("过滤词语,一行一个"),lines=5,max_lines=10,interactive=True) + filter_button = gr.Button(i18n("过滤字幕"), variant="primary",interactive=False) + with gr.Tab(i18n("切分与保存")): + pre_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("前置保留时间"),interactive=True) + post_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置保留时间"),interactive=True) + pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) + post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) + + language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) + audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) + save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) + character = gr.Textbox("character", label=i18n("保存子文件夹名称"),interactive=True) + save_srt_button = gr.Button(i18n("保存合并后字幕"),variant="secondary",interactive=True) + slice_audio_button = gr.Button(i18n("切分并保存音频、list"), variant="primary",interactive=False) + + with gr.Column(scale=2) as output_col: + with gr.Tabs(): + with gr.Tab(i18n("合并后srt文本")): + output_text = gr.Textbox("", lines=20, max_lines=30, label="Sliced SRT") + with gr.Tab(i18n("切分预览")): + gr.Textbox(i18n("正在建设,敬请期待"), label=i18n("提示"),interactive=False) + scan_button.click(get_srt_and_audio_files, [input_folder], [srt_files_list, audio_files_list]) + merge_zero_interval.change(lambda x: gr.update(visible=x), [merge_zero_interval],[short_interval]) + srt_files_list.change(change_srt_file, [input_folder, srt_files_list], [audio_files_list]) + srt_read_button.click( + load_from_dropdown, + [input_folder, srt_files_list, audio_files_list], + [input_text, input_audio], + ) + input_text.change( + disable_gr_elements, + [slice_audio_button, filter_button], + [slice_audio_button, filter_button], + ).then( + change_character_name, + [input_audio], + [character], + ) + input_audio.change( + change_character_name, + [input_audio], + [character], + ) + + merge_button.click( + merge_srt, + [ + input_text, + output_text, + short_interval, + max_interval, + max_text_length, + add_period, + merge_zero_interval + ], + [output_text], + ).then( + enable_gr_elements, + [slice_audio_button, filter_button], + [slice_audio_button, filter_button], + ) + slice_audio_button.click( + slice_audio, + [ + input_audio, + save_folder, + audio_format, + output_text, + pre_preserve_time, + post_preserve_time, + pre_silence_time, + post_silence_time, + language, + character + ], + + ) + save_srt_button.click( + save_srt_to_file, + [output_text, save_folder, character], + + ) + filter_button.click( + filter_srt, + [output_text, min_length, filter_english, filter_words], + [output_text], + ) + save_srt_button.click + +app.launch(inbrowser=True, server_port=8991, debug=True) From f2544a8aef363ea85ae15550255d0e61480ae3b1 Mon Sep 17 00:00:00 2001 From: XTer Date: Wed, 20 Mar 2024 23:49:41 +0800 Subject: [PATCH 02/10] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E8=AF=AD?= =?UTF-8?q?=E8=A8=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index 00fcc827..b30b2e6a 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -10,7 +10,7 @@ from i18n.i18n import I18nAuto import os -i18n = I18nAuto(language="en_US", locale_path="./tools/srt_slicer/i18n/locale") +i18n = I18nAuto(language=None, locale_path="./tools/srt_slicer/i18n/locale") def merge_srt(input_text, output_text, short_interval=0.1, max_interval=1, max_text_length=30, add_period=True, merge_zero_interval=True): original_subtitles = parse_srt_with_lib(input_text) From 9125424f3a4540a1d95ab9612448b390899ef4be Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 12:01:30 +0800 Subject: [PATCH 03/10] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86=E4=B8=8A?= =?UTF-8?q?=E4=BC=A0=E6=96=87=E4=BB=B6=E7=9A=84=E9=80=BB=E8=BE=91=EF=BC=9B?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=B8=80=E4=B8=AA=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=A4=B9=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/i18n/locale/en_US.json | 4 +- tools/srt_slicer/i18n/locale/zh_CN.json | 2 + tools/srt_slicer/webui.py | 50 ++++++++++++++++--------- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/tools/srt_slicer/i18n/locale/en_US.json b/tools/srt_slicer/i18n/locale/en_US.json index add77f61..b1bf423e 100644 --- a/tools/srt_slicer/i18n/locale/en_US.json +++ b/tools/srt_slicer/i18n/locale/en_US.json @@ -3,6 +3,7 @@ "SRT文件": "SRT File", "srt文件内容": "SRT File Content", "上传SRT文件": "Upload SRT File", + "上传文件": "Upload Files", "作者: ": "Author: ", "使用方法": "How to Use", "保存合并后字幕": "Save Merged Subtitles", @@ -33,6 +34,7 @@ "最长允许单句长度": "Maximum Allowed Sentence Length", "根据面板合并短句并过滤你不希望出现的句子。": "Merge short sentences according to the panel and filter out sentences you do not want to appear.", "正在建设,敬请期待": "Under Construction, Stay Tuned", + "注意:该文件夹已存在": "Warning: The folder already exists", "语言": "Language", "读取文件": "Read File", "读取本地文件": "Read Local File", @@ -44,4 +46,4 @@ "随后保存成切分好的音频与list文件。": "Then save as split audio and list files.", "音频文件": "Audio File", "音频格式": "Audio Format" - } \ No newline at end of file +} diff --git a/tools/srt_slicer/i18n/locale/zh_CN.json b/tools/srt_slicer/i18n/locale/zh_CN.json index 7a0a7e4b..e235eab3 100644 --- a/tools/srt_slicer/i18n/locale/zh_CN.json +++ b/tools/srt_slicer/i18n/locale/zh_CN.json @@ -3,6 +3,7 @@ "SRT文件": "SRT文件", "srt文件内容": "srt文件内容", "上传SRT文件": "上传SRT文件", + "上传文件": "上传文件", "作者: ": "作者: ", "使用方法": "使用方法", "保存合并后字幕": "保存合并后字幕", @@ -33,6 +34,7 @@ "最长允许单句长度": "最长允许单句长度", "根据面板合并短句并过滤你不希望出现的句子。": "根据面板合并短句并过滤你不希望出现的句子。", "正在建设,敬请期待": "正在建设,敬请期待", + "注意:该文件夹已存在": "注意:该文件夹已存在", "语言": "语言", "读取文件": "读取文件", "读取本地文件": "读取本地文件", diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index b30b2e6a..436a0484 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -3,7 +3,7 @@ import gradio as gr import sys sys.path.append('.') sys.path.append('..') -from srt_utils import merge_subtitles_with_lib, parse_srt_with_lib, generate_srt_with_lib, slice_audio_with_lib, count_words_multilang +from tools.srt_slicer.srt_utils import merge_subtitles_with_lib, parse_srt_with_lib, generate_srt_with_lib, slice_audio_with_lib, count_words_multilang from i18n.i18n import I18nAuto @@ -162,7 +162,11 @@ def change_character_name(input_audio): character = datetime.now().strftime("%m%d%H%M") return gr.Textbox(value=character) - +def check_character_foldfer(folder, character): + character_folder = os.path.join(folder, character) + if os.path.exists(character_folder): + return gr.Textbox(visible=True) + return gr.Textbox(visible=False) with gr.Blocks() as app: with gr.Row(): @@ -185,12 +189,13 @@ with gr.Blocks() as app: srt_files_list = gr.Dropdown([], label=i18n("SRT文件"),interactive=True) audio_files_list = gr.Dropdown([], label=i18n("音频文件"),interactive=True) srt_read_button = gr.Button(i18n("读取文件"), variant="secondary",interactive=True) - with gr.Tab(i18n("上传SRT文件")): + with gr.Tab(i18n("上传文件")): input_srt_file = gr.File(label=i18n("上传SRT文件"), type="filepath", file_types=["srt"]) + upload_audio = gr.Audio(type="filepath",label=i18n("音频文件")) # input_audio_file = gr.File(label=i18n("上传音频文件"), type="audio", file_types=["mp3", "wav", "ogg"]) with gr.Tabs(): with gr.Tab(i18n("内容预览")): - input_audio = gr.Audio(type="filepath",label=i18n("音频文件")) + input_audio = gr.Textbox("", label=i18n("音频文件"),interactive=False) input_text = gr.Textbox("", lines=20, max_lines=30, label=i18n("srt文件内容")) input_srt_file.change(load_srt_from_file, [input_srt_file], [input_text]) with gr.Column(scale=1) as control_col: @@ -209,15 +214,18 @@ with gr.Blocks() as app: filter_words = gr.Textbox("", label=i18n("过滤词语,一行一个"),lines=5,max_lines=10,interactive=True) filter_button = gr.Button(i18n("过滤字幕"), variant="primary",interactive=False) with gr.Tab(i18n("切分与保存")): - pre_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("前置保留时间"),interactive=True) - post_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置保留时间"),interactive=True) - pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) - post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) - - language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) - audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) - save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) - character = gr.Textbox("character", label=i18n("保存子文件夹名称"),interactive=True) + with gr.Group(): + pre_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("前置保留时间"),interactive=True) + post_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置保留时间"),interactive=True) + pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) + post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) + with gr.Group(): + language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) + audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) + with gr.Group(): + save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) + character = gr.Textbox("character", label=i18n("保存子文件夹名称"),interactive=True) + character_warning = gr.Textbox(i18n("注意:该文件夹已存在"), label=i18n("提示"),interactive=False,visible=False) save_srt_button = gr.Button(i18n("保存合并后字幕"),variant="secondary",interactive=True) slice_audio_button = gr.Button(i18n("切分并保存音频、list"), variant="primary",interactive=False) @@ -244,10 +252,15 @@ with gr.Blocks() as app: [input_audio], [character], ) - input_audio.change( + + upload_audio.change( change_character_name, - [input_audio], + [upload_audio], [character], + ).then( + lambda x:gr.Textbox(value=x), + [upload_audio], + [input_audio], ) merge_button.click( @@ -293,6 +306,9 @@ with gr.Blocks() as app: [output_text, min_length, filter_english, filter_words], [output_text], ) - save_srt_button.click - + character.change( + check_character_foldfer, + [save_folder, character], + [character_warning], + ) app.launch(inbrowser=True, server_port=8991, debug=True) From 28d3a5bfa569aed3186da31355034a7430ce9cc5 Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 15:05:26 +0800 Subject: [PATCH 04/10] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E4=BA=86=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=EF=BC=8C=E5=A2=9E=E5=8A=A0=E4=BA=86List=E5=90=88?= =?UTF-8?q?=E5=B9=B6=E5=B0=8F=E5=B7=A5=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/srt_utils.py | 42 +++++++- tools/srt_slicer/webui.py | 181 ++++++++++++++++++++++------------ 2 files changed, 160 insertions(+), 63 deletions(-) diff --git a/tools/srt_slicer/srt_utils.py b/tools/srt_slicer/srt_utils.py index 6dd226d6..1f030d23 100644 --- a/tools/srt_slicer/srt_utils.py +++ b/tools/srt_slicer/srt_utils.py @@ -1,4 +1,5 @@ import srt +import shutil def parse_srt_with_lib(content): @@ -87,10 +88,47 @@ def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserv try: audio = pydub.AudioSegment.from_file(audio_path) sliced_audio = audio[int(start * 1000):int(end * 1000)] - file_name = f'{i + 1:03d}.{format}' + file_name = f'{character}_{i + 1:03d}.{format}' save_path = os.path.join(save_folder, file_name) sliced_audio.export(save_path, format=format) f.write(f"{file_name}|{character}|{language}|{subtitle.content}\n") except Exception as e: raise e - \ No newline at end of file + +def merge_list_folders(first_list_file, second_list_file, character, first_folder, second_folder): + merged_lines = [] + character1 = "" + filenames = set() + with open(first_list_file, 'r', encoding="utf-8") as f: + first_list = f.readlines() + for line in first_list: + filename, character1, language, content = line.split('|') + filenames.add(filename) + if character=="" or character is None: + character = character1 + new_line = f"{filename}|{character}|{language}|{content}" + merged_lines.append(new_line) + with open(second_list_file, 'r', encoding="utf-8") as f: + second_list = f.readlines() + for line in second_list: + filename, _, language, content = line.split('|') + orig_filename = filename + num = 1 + while filename in filenames: + filename = f"{filename.rsplit('.', 1)[0]}_{num}.{filename.rsplit('.', 1)[1]}" + num += 1 + try: + os.rename(os.path.join(second_folder, orig_filename), os.path.join(first_folder, filename)) + except Exception as e: + raise e + new_line = f"{filename}|{character}|{language}|{content}" + merged_lines.append(new_line) + os.remove(second_list_file) + if not os.listdir(second_folder): + os.rmdir(second_folder) + with open(first_list_file, 'w', encoding="utf-8") as f: + f.writelines(merged_lines) + return "\n".join(merged_lines) + + + \ No newline at end of file diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index 436a0484..d037593b 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -1,9 +1,18 @@ import gradio as gr import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) sys.path.append('.') -sys.path.append('..') -from tools.srt_slicer.srt_utils import merge_subtitles_with_lib, parse_srt_with_lib, generate_srt_with_lib, slice_audio_with_lib, count_words_multilang +from srt_utils import ( + merge_subtitles_with_lib, + parse_srt_with_lib, + generate_srt_with_lib, + slice_audio_with_lib, + count_words_multilang, + merge_list_folders +) from i18n.i18n import I18nAuto @@ -19,7 +28,6 @@ def merge_srt(input_text, output_text, short_interval=0.1, max_interval=1, max_t return output_text - def slice_audio( input_audio, save_folder, @@ -46,6 +54,7 @@ def slice_audio( os.makedirs(character_folder, exist_ok=True) subtitles = parse_srt_with_lib(output_text) try: + gr.Info(f"{i18n('正在切分音频')} {input_audio} {i18n('到')} {character_folder}") slice_audio_with_lib( input_audio, character_folder, @@ -58,9 +67,10 @@ def slice_audio( language, character, ) + gr.Info(f"{i18n('切分完成')} ") except Exception as e: gr.Warning(f"Can't Slice, Error: {e}") - + def get_relative_path(path, base): return os.path.relpath(path, base) @@ -124,13 +134,13 @@ def load_srt_from_file(srt_file): return f.read() except: return "" - + def load_audio_from_file(audio_file): try: return gr.Audio(audio_file) except: return gr.Audio(value=None) - + def load_from_dropdown(input_folder, srt_files_list, audio_files_list): if isinstance(srt_files_list, str) and isinstance(audio_files_list, str): srt_file= os.path.join(input_folder, srt_files_list) @@ -151,7 +161,40 @@ def save_srt_to_file(srt_text, save_folder, character): srt_file = os.path.join(character_folder, "merged.srt") with open(srt_file, "w", encoding="utf-8") as f: f.write(srt_text) + +def scan_list_folders(folder): + if not os.path.exists(folder): + os.makedirs(folder, exist_ok=True) + list_folders = [] + for list_folder in os.listdir(folder): + if os.path.isdir(os.path.join(folder, list_folder)): + list_folders.append(get_relative_path(os.path.join(folder, list_folder), folder)) + first_list_folder = "" + second_list_folder = "" + if len(list_folders) > 0: + first_list_folder = second_list_folder = list_folders[0] + if len(list_folders) > 1: + second_list_folder = list_folders[1] + return gr.Dropdown(list_folders, value=first_list_folder), gr.Dropdown(list_folders, value=second_list_folder) + +def preview_merged_list(first_list_folder, second_list_folder, merge_list_character_name, save_folder): + if first_list_folder == "" or second_list_folder == "": + return "" + if first_list_folder == second_list_folder: + gr.Warning(i18n("两个文件夹不能相同!!!")) + return "" + first_list_folder = os.path.join(save_folder, first_list_folder) + second_list_folder = os.path.join(save_folder, second_list_folder) + print(f"first_list_folder: {first_list_folder}, second_list_folder: {second_list_folder}") + first_list = os.path.join(first_list_folder, [file for file in os.listdir(first_list_folder) if file.lower().endswith(".list")][0]) + second_list = os.path.join(second_list_folder, [file for file in os.listdir(second_list_folder) if file.lower().endswith(".list")][0]) + try: + return merge_list_folders(first_list, second_list, merge_list_character_name, first_list_folder, second_list_folder) + except Exception as e: + gr.Warning(f"Can't Merge, Error: {e}") + return "" + from datetime import datetime def change_character_name(input_audio): @@ -178,63 +221,79 @@ with gr.Blocks() as app:
  • {i18n("根据面板合并短句并过滤你不希望出现的句子。")}
  • {i18n("随后保存成切分好的音频与list文件。")}
  • """) - - - with gr.Row(): - with gr.Column(scale=2) as input_col: - with gr.Tabs(): - with gr.Tab(i18n("读取本地文件")): - input_folder = gr.Textbox("input/srt_and_audios", label=i18n("文件夹路径"),interactive=True) - scan_button = gr.Button(i18n("扫描文件夹"), variant="secondary",interactive=True) - srt_files_list = gr.Dropdown([], label=i18n("SRT文件"),interactive=True) - audio_files_list = gr.Dropdown([], label=i18n("音频文件"),interactive=True) - srt_read_button = gr.Button(i18n("读取文件"), variant="secondary",interactive=True) - with gr.Tab(i18n("上传文件")): - input_srt_file = gr.File(label=i18n("上传SRT文件"), type="filepath", file_types=["srt"]) - upload_audio = gr.Audio(type="filepath",label=i18n("音频文件")) - # input_audio_file = gr.File(label=i18n("上传音频文件"), type="audio", file_types=["mp3", "wav", "ogg"]) - with gr.Tabs(): - with gr.Tab(i18n("内容预览")): - input_audio = gr.Textbox("", label=i18n("音频文件"),interactive=False) - input_text = gr.Textbox("", lines=20, max_lines=30, label=i18n("srt文件内容")) - input_srt_file.change(load_srt_from_file, [input_srt_file], [input_text]) - with gr.Column(scale=1) as control_col: - with gr.Tabs(): - with gr.Tab(i18n("合并字幕设置")): - merge_zero_interval = gr.Checkbox(label=i18n("提前合并时间间隔很短的字幕"),interactive=True, value=True) - short_interval = gr.Slider(value=0.05, minimum=0, maximum=0.5, step=0.005, label=i18n("判定为短间隔时长"),interactive=True,visible=True) - max_interval = gr.Slider(value=0.8, minimum=0.1, maximum=10, step=0.1, label=i18n("最大间隔时间"),interactive=True) - max_text_length = gr.Slider(value=50,minimum=5,maximum=200,step=1, label=i18n("最长允许单句长度"),interactive=True) - add_period = gr.Checkbox(label=i18n("句末加句号"),interactive=True, value=True) - merge_button = gr.Button(i18n("合并字幕"), variant="primary") + with gr.Tabs(): + with gr.Tab(i18n("SRT编辑界面")): + with gr.Row(): + with gr.Column(scale=2) as input_col: + with gr.Tabs(): + with gr.Tab(i18n("读取本地文件")): + input_folder = gr.Textbox("input/srt_and_audios", label=i18n("文件夹路径"),interactive=True) + scan_button = gr.Button(i18n("扫描文件夹"), variant="secondary",interactive=True) + srt_files_list = gr.Dropdown([], label=i18n("SRT文件"),interactive=True) + audio_files_list = gr.Dropdown([], label=i18n("音频文件"),interactive=True) + srt_read_button = gr.Button(i18n("读取文件"), variant="secondary",interactive=True) + with gr.Tab(i18n("上传文件")): + input_srt_file = gr.File(label=i18n("上传SRT文件"), type="filepath", file_types=["srt"]) + upload_audio = gr.Audio(type="filepath",label=i18n("音频文件")) + # input_audio_file = gr.File(label=i18n("上传音频文件"), type="audio", file_types=["mp3", "wav", "ogg"]) + with gr.Tabs(): + with gr.Tab(i18n("内容预览")): + input_audio = gr.Textbox("", label=i18n("音频文件"),interactive=False) + input_text = gr.Textbox("", lines=20, max_lines=30, label=i18n("srt文件内容")) + input_srt_file.change(load_srt_from_file, [input_srt_file], [input_text]) + with gr.Column(scale=1) as control_col: + with gr.Tabs(): + with gr.Tab(i18n("合并字幕设置")): + merge_zero_interval = gr.Checkbox(label=i18n("提前合并时间间隔很短的字幕"),interactive=True, value=True) + short_interval = gr.Slider(value=0.05, minimum=0, maximum=0.5, step=0.005, label=i18n("判定为短间隔时长"),interactive=True,visible=True) + max_interval = gr.Slider(value=0.8, minimum=0.1, maximum=10, step=0.1, label=i18n("最大间隔时间"),interactive=True) + max_text_length = gr.Slider(value=50,minimum=5,maximum=200,step=1, label=i18n("最长允许单句长度"),interactive=True) + add_period = gr.Checkbox(label=i18n("句末加句号"),interactive=True, value=True) + merge_button = gr.Button(i18n("合并字幕"), variant="primary") - with gr.Tab(i18n("过滤设置")): - min_length = gr.Slider(value=5, minimum=0, maximum=20, step=1, label=i18n("允许最短长度"),interactive=True) - filter_english = gr.Checkbox(label=i18n("过滤带有英文的"),interactive=True) - filter_words = gr.Textbox("", label=i18n("过滤词语,一行一个"),lines=5,max_lines=10,interactive=True) - filter_button = gr.Button(i18n("过滤字幕"), variant="primary",interactive=False) - with gr.Tab(i18n("切分与保存")): - with gr.Group(): - pre_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("前置保留时间"),interactive=True) - post_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置保留时间"),interactive=True) - pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) - post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) - with gr.Group(): - language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) - audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) - with gr.Group(): - save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) - character = gr.Textbox("character", label=i18n("保存子文件夹名称"),interactive=True) - character_warning = gr.Textbox(i18n("注意:该文件夹已存在"), label=i18n("提示"),interactive=False,visible=False) - save_srt_button = gr.Button(i18n("保存合并后字幕"),variant="secondary",interactive=True) - slice_audio_button = gr.Button(i18n("切分并保存音频、list"), variant="primary",interactive=False) + with gr.Tab(i18n("过滤设置")): + min_length = gr.Slider(value=5, minimum=0, maximum=20, step=1, label=i18n("允许最短长度"),interactive=True) + filter_english = gr.Checkbox(label=i18n("过滤带有英文的"),interactive=True) + filter_words = gr.Textbox("", label=i18n("过滤词语,一行一个"),lines=5,max_lines=10,interactive=True) + filter_button = gr.Button(i18n("过滤字幕"), variant="primary",interactive=False) + with gr.Tab(i18n("切分与保存")): + with gr.Group(): + pre_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("前置保留时间"),interactive=True) + post_preserve_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置保留时间"),interactive=True) + pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) + post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) + with gr.Group(): + language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) + audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) + with gr.Group(): + save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) + character = gr.Textbox("character", label=i18n("保存子文件夹名称"),interactive=True) + character_warning = gr.Textbox(i18n("注意:该文件夹已存在"), label=i18n("提示"),interactive=False,visible=False) + save_srt_button = gr.Button(i18n("保存合并后字幕"),variant="secondary",interactive=True) + slice_audio_button = gr.Button(i18n("切分并保存音频、list"), variant="primary",interactive=False) - with gr.Column(scale=2) as output_col: - with gr.Tabs(): - with gr.Tab(i18n("合并后srt文本")): - output_text = gr.Textbox("", lines=20, max_lines=30, label="Sliced SRT") - with gr.Tab(i18n("切分预览")): - gr.Textbox(i18n("正在建设,敬请期待"), label=i18n("提示"),interactive=False) + with gr.Column(scale=2) as output_col: + with gr.Tabs(): + with gr.Tab(i18n("合并后srt文本")): + output_text = gr.Textbox("", lines=20, max_lines=30, label="Sliced SRT") + with gr.Tab(i18n("切分预览")): + gr.Textbox(i18n("正在建设,敬请期待"), label=i18n("提示"),interactive=False) + with gr.Tab(i18n("List 合并小工具")): + with gr.Row(): + with gr.Column(scale=2): + scan_list_folder = gr.Textbox("output/sliced_audio", label=i18n("文件夹路径"),interactive=True) + scan_list_button = gr.Button(i18n("扫描文件夹"), variant="secondary") + first_list_folder = gr.Dropdown([], label=i18n("主文件夹"),interactive=True) + second_list_folder = gr.Dropdown([], label=i18n("次文件夹"),interactive=True) + merge_list_character_name = gr.Textbox("", label=i18n("角色名称,留空使用主文件夹的"),interactive=True) + merge_list_button = gr.Button(i18n("合并文件夹与List"), variant="primary") + with gr.Column(scale=2): + list_preview = gr.Textbox("", lines=20, max_lines=30, label=i18n("合并后的List")) + + scan_list_button.click(scan_list_folders, [scan_list_folder], [first_list_folder, second_list_folder]) + merge_list_button.click(preview_merged_list, [first_list_folder, second_list_folder, merge_list_character_name, scan_list_folder], [list_preview]) + save_folder.change(lambda x:gr.Textbox(value=x), [save_folder], [scan_list_folder]) + scan_list_folder.change(lambda x:gr.Textbox(value=x), [scan_list_folder], [save_folder]) scan_button.click(get_srt_and_audio_files, [input_folder], [srt_files_list, audio_files_list]) merge_zero_interval.change(lambda x: gr.update(visible=x), [merge_zero_interval],[short_interval]) srt_files_list.change(change_srt_file, [input_folder, srt_files_list], [audio_files_list]) From ccf832c4d8797dd5ac240f10f9cee1499b97bf3c Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 15:23:15 +0800 Subject: [PATCH 05/10] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86=E8=AF=AD?= =?UTF-8?q?=E8=A8=80=E7=BF=BB=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/i18n/locale/en_US.json | 11 +++++++++++ tools/srt_slicer/i18n/locale/zh_CN.json | 11 +++++++++++ tools/srt_slicer/webui.py | 1 + 3 files changed, 23 insertions(+) diff --git a/tools/srt_slicer/i18n/locale/en_US.json b/tools/srt_slicer/i18n/locale/en_US.json index b1bf423e..415d36d2 100644 --- a/tools/srt_slicer/i18n/locale/en_US.json +++ b/tools/srt_slicer/i18n/locale/en_US.json @@ -1,9 +1,13 @@ { + "List 合并小工具": "List Merge Tool", "SRT合并切分插件": "SRT Merge and Split Plugin", "SRT文件": "SRT File", + "SRT编辑界面": "SRT Edit Interface", "srt文件内容": "SRT File Content", "上传SRT文件": "Upload SRT File", "上传文件": "Upload Files", + "两个文件夹不能相同!!!": "The two folders cannot be the same!!!", + "主文件夹": "Main Folder", "作者: ": "Author: ", "使用方法": "How to Use", "保存合并后字幕": "Save Merged Subtitles", @@ -12,15 +16,19 @@ "允许最短长度": "Minimum Allowed Length", "内容预览": "Content Preview", "切分与保存": "Split and Save", + "切分完成": "Split Completed", "切分并保存音频、list": "Split and Save Audio, List", "切分预览": "Split Preview", "判定为短间隔时长": "Judged as Short Interval Duration", + "到": " to ", "前置保留时间": "Preceding Retention Time", "前置添加静音时间": "Prepend Silence Time", "句末加句号": "Add Period at the End of Sentence", "合并后srt文本": "Merged SRT Text", + "合并后的List": "Merged List", "合并字幕": "Merge Subtitles", "合并字幕设置": "Subtitle Merge Settings", + "合并文件夹与List": "Merge Folder and List", "后置保留时间": "Following Retention Time", "后置添加静音时间": "Append Silence Time", "扫描文件夹": "Scan Folder", @@ -33,8 +41,11 @@ "最大间隔时间": "Maximum Interval Time", "最长允许单句长度": "Maximum Allowed Sentence Length", "根据面板合并短句并过滤你不希望出现的句子。": "Merge short sentences according to the panel and filter out sentences you do not want to appear.", + "次文件夹": "Second Folder", + "正在切分音频": "Splitting Audio", "正在建设,敬请期待": "Under Construction, Stay Tuned", "注意:该文件夹已存在": "Warning: The folder already exists", + "角色名称,留空使用主文件夹的": "Role Name, Leave Blank to Use Main Folder's", "语言": "Language", "读取文件": "Read File", "读取本地文件": "Read Local File", diff --git a/tools/srt_slicer/i18n/locale/zh_CN.json b/tools/srt_slicer/i18n/locale/zh_CN.json index e235eab3..27567f00 100644 --- a/tools/srt_slicer/i18n/locale/zh_CN.json +++ b/tools/srt_slicer/i18n/locale/zh_CN.json @@ -1,9 +1,13 @@ { + "List 合并小工具": "List 合并小工具", "SRT合并切分插件": "SRT合并切分插件", "SRT文件": "SRT文件", + "SRT编辑界面": "SRT编辑界面", "srt文件内容": "srt文件内容", "上传SRT文件": "上传SRT文件", "上传文件": "上传文件", + "两个文件夹不能相同!!!": "两个文件夹不能相同!!!", + "主文件夹": "主文件夹", "作者: ": "作者: ", "使用方法": "使用方法", "保存合并后字幕": "保存合并后字幕", @@ -12,15 +16,19 @@ "允许最短长度": "允许最短长度", "内容预览": "内容预览", "切分与保存": "切分与保存", + "切分完成": "切分完成", "切分并保存音频、list": "切分并保存音频、list", "切分预览": "切分预览", "判定为短间隔时长": "判定为短间隔时长", + "到": "到", "前置保留时间": "前置保留时间", "前置添加静音时间": "前置添加静音时间", "句末加句号": "句末加句号", "合并后srt文本": "合并后srt文本", + "合并后的List": "合并后的List", "合并字幕": "合并字幕", "合并字幕设置": "合并字幕设置", + "合并文件夹与List": "合并文件夹与List", "后置保留时间": "后置保留时间", "后置添加静音时间": "后置添加静音时间", "扫描文件夹": "扫描文件夹", @@ -33,8 +41,11 @@ "最大间隔时间": "最大间隔时间", "最长允许单句长度": "最长允许单句长度", "根据面板合并短句并过滤你不希望出现的句子。": "根据面板合并短句并过滤你不希望出现的句子。", + "次文件夹": "次文件夹", + "正在切分音频": "正在切分音频", "正在建设,敬请期待": "正在建设,敬请期待", "注意:该文件夹已存在": "注意:该文件夹已存在", + "角色名称,留空使用主文件夹的": "角色名称,留空使用主文件夹的", "语言": "语言", "读取文件": "读取文件", "读取本地文件": "读取本地文件", diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index d037593b..bd586a6d 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -4,6 +4,7 @@ import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) +sys.path.append(os.path.abspath(os.path.dirname(__file__))) sys.path.append('.') from srt_utils import ( merge_subtitles_with_lib, From 3d65166b35167a9b260169f537df9712ab900021 Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 19:52:27 +0800 Subject: [PATCH 06/10] =?UTF-8?q?=E9=87=8D=E6=9E=84=E4=BA=86webui=EF=BC=8C?= =?UTF-8?q?=E6=8F=90=E4=BE=9B=E4=BA=86=E5=8F=A6=E4=B8=80=E7=A7=8D=E9=A3=8E?= =?UTF-8?q?=E6=A0=BC=EF=BC=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 + tools/srt_slicer/webui.py | 7 +- webui.py | 398 ++++++++++++++++++++++---------------- 3 files changed, 235 insertions(+), 172 deletions(-) diff --git a/config.py b/config.py index 1f741285..a989f098 100644 --- a/config.py +++ b/config.py @@ -26,6 +26,7 @@ webui_port_main = 9874 webui_port_uvr5 = 9873 webui_port_infer_tts = 9872 webui_port_subfix = 9871 +webui_port_srt_slicer = 9870 api_port = 9880 @@ -62,5 +63,6 @@ class Config: self.webui_port_uvr5 = webui_port_uvr5 self.webui_port_infer_tts = webui_port_infer_tts self.webui_port_subfix = webui_port_subfix + self.webui_port_srt_slicer = webui_port_srt_slicer self.api_port = api_port diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index bd586a6d..938248f8 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -15,6 +15,11 @@ from srt_utils import ( merge_list_folders ) +port = 8991 + +if len(sys.argv) > 1: + port = int(sys.argv[1]) + from i18n.i18n import I18nAuto @@ -371,4 +376,4 @@ with gr.Blocks() as app: [save_folder, character], [character_warning], ) -app.launch(inbrowser=True, server_port=8991, debug=True) +app.launch(inbrowser=True, server_port=port, debug=True) diff --git a/webui.py b/webui.py index e1c36e1e..bf476b36 100644 --- a/webui.py +++ b/webui.py @@ -48,7 +48,7 @@ import pdb import gradio as gr from subprocess import Popen import signal -from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share +from config import python_exec,infer_device,is_half,exp_root,webui_port_main,webui_port_infer_tts,webui_port_uvr5,webui_port_subfix,is_share,webui_port_srt_slicer from tools.i18n.i18n import I18nAuto i18n = I18nAuto() from scipy.io import wavfile @@ -120,6 +120,7 @@ p_uvr5=None p_asr=None p_denoise=None p_tts_inference=None +p_srt_slicer=None def kill_proc_tree(pid, including_parent=True): try: @@ -162,6 +163,18 @@ def change_label(if_label,path_list): p_label=None yield i18n("打标工具WebUI已关闭") +def change_srt_slicer(if_srt_slicer): + global p_srt_slicer + if(if_srt_slicer==True and p_srt_slicer==None): + cmd = '"%s" tools/srt_slicer/webui.py %s'%(python_exec,webui_port_srt_slicer) + yield i18n("SRT切割工具WebUI已开启") + print(cmd) + p_srt_slicer = Popen(cmd, shell=True) + elif(if_srt_slicer==False and p_srt_slicer!=None): + kill_process(p_srt_slicer.pid) + p_srt_slicer=None + yield i18n("SRT切割工具WebUI已关闭") + def change_uvr5(if_uvr5): global p_uvr5 if(if_uvr5==True and p_uvr5==None): @@ -680,143 +693,168 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: i18n("中文教程文档:https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e") ) + # 重构前端WebUI的展示方式,by XTer: https://github.com/X-T-E-R with gr.Tabs(): - with gr.TabItem(i18n("0-前置数据集获取工具")):#提前随机切片防止uvr5爆内存->uvr5->slicer->asr->打标 - gr.Markdown(value=i18n("0a-UVR5人声伴奏分离&去混响去延迟工具")) + with gr.Tab(i18n("0-前置数据集获取工具")):#提前随机切片防止uvr5爆内存->uvr5->slicer->asr->打标 with gr.Row(): - if_uvr5 = gr.Checkbox(label=i18n("是否开启UVR5-WebUI"),show_label=True) - uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息")) - gr.Markdown(value=i18n("0b-语音切分工具")) - with gr.Row(): - with gr.Row(): - slice_inp_path=gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"),value="") - slice_opt_root=gr.Textbox(label=i18n("切分后的子音频的输出根目录"),value="output/slicer_opt") - threshold=gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"),value="-34") - min_length=gr.Textbox(label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"),value="4000") - min_interval=gr.Textbox(label=i18n("min_interval:最短切割间隔"),value="300") - hop_size=gr.Textbox(label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"),value="10") - max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="500") - with gr.Row(): - open_slicer_button=gr.Button(i18n("开启语音切割"), variant="primary",visible=True) - close_slicer_button=gr.Button(i18n("终止语音切割"), variant="primary",visible=False) - _max=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("max:归一化后最大值多少"),value=0.9,interactive=True) - alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True) - n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) - slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息")) - gr.Markdown(value=i18n("0bb-语音降噪工具")) - with gr.Row(): - open_denoise_button = gr.Button(i18n("开启语音降噪"), variant="primary",visible=True) - close_denoise_button = gr.Button(i18n("终止语音降噪进程"), variant="primary",visible=False) - denoise_input_dir=gr.Textbox(label=i18n("降噪音频文件输入文件夹"),value="") - denoise_output_dir=gr.Textbox(label=i18n("降噪结果输出文件夹"),value="output/denoise_opt") - denoise_info = gr.Textbox(label=i18n("语音降噪进程输出信息")) - gr.Markdown(value=i18n("0c-中文批量离线ASR工具")) - with gr.Row(): - open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True) - close_asr_button = gr.Button(i18n("终止ASR进程"), variant="primary",visible=False) - with gr.Column(): - with gr.Row(): - asr_inp_dir = gr.Textbox( - label=i18n("输入文件夹路径"), - value="D:\\GPT-SoVITS\\raw\\xxx", - interactive=True, - ) - asr_opt_dir = gr.Textbox( - label = i18n("输出文件夹路径"), - value = "output/asr_opt", - interactive = True, - ) - with gr.Row(): - asr_model = gr.Dropdown( - label = i18n("ASR 模型"), - choices = list(asr_dict.keys()), - interactive = True, - value="达摩 ASR (中文)" - ) - asr_size = gr.Dropdown( - label = i18n("ASR 模型尺寸"), - choices = ["large"], - interactive = True, - value="large" - ) - asr_lang = gr.Dropdown( - label = i18n("ASR 语言设置"), - choices = ["zh"], - interactive = True, - value="zh" - ) - with gr.Row(): - asr_info = gr.Textbox(label=i18n("ASR进程输出信息")) + with gr.Column(scale=2): + with gr.Tabs(): + with gr.Tab(i18n("0a-UVR5人声伴奏分离&去混响去延迟工具")): + if_uvr5 = gr.Checkbox(label=i18n("是否开启UVR5-WebUI"),show_label=True) + uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息")) + with gr.Tab(i18n("0b-语音切分工具")): + with gr.Row(): + slice_inp_path=gr.Textbox(label=i18n("音频自动切分输入路径,可文件可文件夹"),value="") + slice_opt_root=gr.Textbox(label=i18n("切分后的子音频的输出根目录"),value="output/slicer_opt") + threshold=gr.Textbox(label=i18n("threshold:音量小于这个值视作静音的备选切割点"),value="-34") + min_length=gr.Textbox(label=i18n("min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值"),value="4000") + min_interval=gr.Textbox(label=i18n("min_interval:最短切割间隔"),value="300") + hop_size=gr.Textbox(label=i18n("hop_size:怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)"),value="10") + max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="500") + with gr.Row(): + _max=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("max:归一化后最大值多少"),value=0.9,interactive=True) + alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True) + n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) + with gr.Row(): + with gr.Group(): + slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息")) + open_slicer_button=gr.Button(i18n("开启语音切割"), variant="primary",visible=True) + close_slicer_button=gr.Button(i18n("终止语音切割"), variant="primary",visible=False) + with gr.Tab(i18n("0bb-语音降噪工具")): + with gr.Row(): + + denoise_input_dir=gr.Textbox(label=i18n("降噪音频文件输入文件夹"),value="") + denoise_output_dir=gr.Textbox(label=i18n("降噪结果输出文件夹"),value="output/denoise_opt") + with gr.Row(): + with gr.Group(): + denoise_info = gr.Textbox(label=i18n("语音降噪进程输出信息")) + open_denoise_button = gr.Button(i18n("开启语音降噪"), variant="primary",visible=True) + close_denoise_button = gr.Button(i18n("终止语音降噪进程"), variant="primary",visible=False) + with gr.Column(scale=2): + with gr.Tabs(): + with gr.Tab(i18n("0c-离线批量ASR工具")): + + with gr.Column(): + with gr.Row(): + asr_inp_dir = gr.Textbox( + label=i18n("输入文件夹路径"), + value="D:\\GPT-SoVITS\\raw\\xxx", + interactive=True, + ) + asr_opt_dir = gr.Textbox( + label = i18n("输出文件夹路径"), + value = "output/asr_opt", + interactive = True, + ) + with gr.Row(): + asr_model = gr.Dropdown( + label = i18n("ASR 模型"), + choices = list(asr_dict.keys()), + interactive = True, + value="达摩 ASR (中文)" + ) + asr_size = gr.Dropdown( + label = i18n("ASR 模型尺寸"), + choices = ["large"], + interactive = True, + value="large" + ) + asr_lang = gr.Dropdown( + label = i18n("ASR 语言设置"), + choices = ["zh"], + interactive = True, + value="zh" + ) + with gr.Row(): + with gr.Group(): + asr_info = gr.Textbox(label=i18n("ASR进程输出信息")) + open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True) + close_asr_button = gr.Button(i18n("终止ASR进程"), variant="primary",visible=False) - def change_lang_choices(key): #根据选择的模型修改可选的语言 - # return gr.Dropdown(choices=asr_dict[key]['lang']) - return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]} - def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 - # return gr.Dropdown(choices=asr_dict[key]['size']) - return {"__type__": "update", "choices": asr_dict[key]['size']} - asr_model.change(change_lang_choices, [asr_model], [asr_lang]) - asr_model.change(change_size_choices, [asr_model], [asr_size]) - - gr.Markdown(value=i18n("0d-语音文本校对标注工具")) - with gr.Row(): - if_label = gr.Checkbox(label=i18n("是否开启打标WebUI"),show_label=True) - path_list = gr.Textbox( - label=i18n(".list标注文件的路径"), - value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list", - interactive=True, - ) - label_info = gr.Textbox(label=i18n("打标工具进程输出信息")) - if_label.change(change_label, [if_label,path_list], [label_info]) - if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info]) - open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button]) - close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button]) - open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button]) - close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button]) - open_denoise_button.click(open_denoise, [denoise_input_dir,denoise_output_dir], [denoise_info,open_denoise_button,close_denoise_button]) - close_denoise_button.click(close_denoise, [], [denoise_info,open_denoise_button,close_denoise_button]) + def change_lang_choices(key): #根据选择的模型修改可选的语言 + # return gr.Dropdown(choices=asr_dict[key]['lang']) + return {"__type__": "update", "choices": asr_dict[key]['lang'],"value":asr_dict[key]['lang'][0]} + def change_size_choices(key): # 根据选择的模型修改可选的模型尺寸 + # return gr.Dropdown(choices=asr_dict[key]['size']) + return {"__type__": "update", "choices": asr_dict[key]['size']} + asr_model.change(change_lang_choices, [asr_model], [asr_lang]) + asr_model.change(change_size_choices, [asr_model], [asr_size]) + + with gr.Tab(i18n("0d-语音文本校对标注工具")): + if_label = gr.Checkbox(label=i18n("是否开启打标WebUI"),show_label=True) + path_list = gr.Textbox( + label=i18n(".list标注文件的路径"), + value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list", + interactive=True, + ) + label_info = gr.Textbox(label=i18n("打标工具进程输出信息")) + + with gr.Tab(i18n("0c-基于SRT的音频切分工具")): + if_srt_slicer = gr.Checkbox(label=i18n("是否开启SRT切分工具"),show_label=True) + srt_slicer_info = gr.Textbox(label=i18n("SRT切分工具进程输出信息")) + if_label.change(change_label, [if_label,path_list], [label_info]) + if_srt_slicer.change(change_srt_slicer, [if_srt_slicer], [srt_slicer_info]) + if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info]) + open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang], [asr_info,open_asr_button,close_asr_button]) + close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button]) + open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button]) + close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button]) + open_denoise_button.click(open_denoise, [denoise_input_dir,denoise_output_dir], [denoise_info,open_denoise_button,close_denoise_button]) + close_denoise_button.click(close_denoise, [], [denoise_info,open_denoise_button,close_denoise_button]) - with gr.TabItem(i18n("1-GPT-SoVITS-TTS")): + with gr.Tab(i18n("1-GPT-SoVITS-TTS")): with gr.Row(): exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True) gpu_info = gr.Textbox(label=i18n("显卡信息"), value=gpu_info, visible=True, interactive=False) pretrained_s2G = gr.Textbox(label=i18n("预训练的SoVITS-G模型路径"), value="GPT_SoVITS/pretrained_models/s2G488k.pth", interactive=True) pretrained_s2D = gr.Textbox(label=i18n("预训练的SoVITS-D模型路径"), value="GPT_SoVITS/pretrained_models/s2D488k.pth", interactive=True) pretrained_s1 = gr.Textbox(label=i18n("预训练的GPT模型路径"), value="GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt", interactive=True) - with gr.TabItem(i18n("1A-训练集格式化工具")): - gr.Markdown(value=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹")) + with gr.Tab(i18n("1A-训练集格式化工具")): + with gr.Tabs(): + with gr.Tab(i18n("输出logs/实验名目录下应有23456开头的文件和文件夹")): + with gr.Group(): + inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list",interactive=True) + inp_wav_dir = gr.Textbox( + label=i18n("*训练集音频文件目录"), + # value=r"D:\RVC1006\GPT-SoVITS\raw\xxx", + interactive=True, + placeholder=i18n("填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。") + ) + with gr.Tabs(): + with gr.Tab(i18n("1Aabc-训练集格式化一键三连")): + with gr.Group(): + info1abc=gr.Textbox(label=i18n("一键三连进程输出信息")) + button1abc_open = gr.Button(i18n("开启一键三连"), variant="primary",visible=True) + button1abc_close = gr.Button(i18n("终止一键三连"), variant="primary",visible=False) with gr.Row(): - inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list",interactive=True) - inp_wav_dir = gr.Textbox( - label=i18n("*训练集音频文件目录"), - # value=r"D:\RVC1006\GPT-SoVITS\raw\xxx", - interactive=True, - placeholder=i18n("填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。如果留空则使用.list文件里的绝对全路径。") - ) - gr.Markdown(value=i18n("1Aa-文本内容")) - with gr.Row(): - gpu_numbers1a = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True) - bert_pretrained_dir = gr.Textbox(label=i18n("预训练的中文BERT模型路径"),value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",interactive=False) - button1a_open = gr.Button(i18n("开启文本获取"), variant="primary",visible=True) - button1a_close = gr.Button(i18n("终止文本获取进程"), variant="primary",visible=False) - info1a=gr.Textbox(label=i18n("文本进程输出信息")) - gr.Markdown(value=i18n("1Ab-SSL自监督特征提取")) - with gr.Row(): - gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True) - cnhubert_base_dir = gr.Textbox(label=i18n("预训练的SSL模型路径"),value="GPT_SoVITS/pretrained_models/chinese-hubert-base",interactive=False) - button1b_open = gr.Button(i18n("开启SSL提取"), variant="primary",visible=True) - button1b_close = gr.Button(i18n("终止SSL提取进程"), variant="primary",visible=False) - info1b=gr.Textbox(label=i18n("SSL进程输出信息")) - gr.Markdown(value=i18n("1Ac-语义token提取")) - with gr.Row(): - gpu_numbers1c = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True) - button1c_open = gr.Button(i18n("开启语义token提取"), variant="primary",visible=True) - button1c_close = gr.Button(i18n("终止语义token提取进程"), variant="primary",visible=False) - info1c=gr.Textbox(label=i18n("语义token提取进程输出信息")) - gr.Markdown(value=i18n("1Aabc-训练集格式化一键三连")) - with gr.Row(): - button1abc_open = gr.Button(i18n("开启一键三连"), variant="primary",visible=True) - button1abc_close = gr.Button(i18n("终止一键三连"), variant="primary",visible=False) - info1abc=gr.Textbox(label=i18n("一键三连进程输出信息")) + with gr.Column(): + with gr.Tabs(): + with gr.Tab(i18n("1Aa-文本内容")): + gpu_numbers1a = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True) + bert_pretrained_dir = gr.Textbox(label=i18n("预训练的中文BERT模型路径"),value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",interactive=False) + with gr.Group(): + info1a=gr.Textbox(label=i18n("文本进程输出信息")) + button1a_open = gr.Button(i18n("开启文本获取"), variant="primary",visible=True) + button1a_close = gr.Button(i18n("终止文本获取进程"), variant="primary",visible=False) + with gr.Column(): + with gr.Tabs(): + with gr.Tab(i18n("1Ab-SSL自监督特征提取")): + gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True) + cnhubert_base_dir = gr.Textbox(label=i18n("预训练的SSL模型路径"),value="GPT_SoVITS/pretrained_models/chinese-hubert-base",interactive=False) + with gr.Group(): + info1b=gr.Textbox(label=i18n("SSL进程输出信息")) + button1b_open = gr.Button(i18n("开启SSL提取"), variant="primary",visible=True) + button1b_close = gr.Button(i18n("终止SSL提取进程"), variant="primary",visible=False) + with gr.Column(): + with gr.Tabs(): + with gr.Tab(i18n("1Ac-语义token提取")): + + gpu_numbers1c = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True) + with gr.Group(): + info1c=gr.Textbox(label=i18n("语义token提取进程输出信息")) + button1c_open = gr.Button(i18n("开启语义token提取"), variant="primary",visible=True) + button1c_close = gr.Button(i18n("终止语义token提取进程"), variant="primary",visible=False) + button1a_open.click(open1a, [inp_text,inp_wav_dir,exp_name,gpu_numbers1a,bert_pretrained_dir], [info1a,button1a_open,button1a_close]) button1a_close.click(close1a, [], [info1a,button1a_open,button1a_close]) button1b_open.click(open1b, [inp_text,inp_wav_dir,exp_name,gpu_numbers1Ba,cnhubert_base_dir], [info1b,button1b_open,button1b_close]) @@ -825,54 +863,72 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: button1c_close.click(close1c, [], [info1c,button1c_open,button1c_close]) button1abc_open.click(open1abc, [inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numbers1c,bert_pretrained_dir,cnhubert_base_dir,pretrained_s2G], [info1abc,button1abc_open,button1abc_close]) button1abc_close.click(close1abc, [], [info1abc,button1abc_open,button1abc_close]) - with gr.TabItem(i18n("1B-微调训练")): - gr.Markdown(value=i18n("1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。")) + with gr.Tab(i18n("1B-微调训练")): with gr.Row(): - batch_size = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True) - total_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("总训练轮数total_epoch,不建议太高"),value=8,interactive=True) - text_low_lr_rate = gr.Slider(minimum=0.2,maximum=0.6,step=0.05,label=i18n("文本模块学习率权重"),value=0.4,interactive=True) - save_every_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("保存频率save_every_epoch"),value=4,interactive=True) - if_save_latest = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True) - if_save_every_weights = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True) - gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True) - with gr.Row(): - button1Ba_open = gr.Button(i18n("开启SoVITS训练"), variant="primary",visible=True) - button1Ba_close = gr.Button(i18n("终止SoVITS训练"), variant="primary",visible=False) - info1Ba=gr.Textbox(label=i18n("SoVITS训练进程输出信息")) - gr.Markdown(value=i18n("1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。")) - with gr.Row(): - batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True) - total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True) - if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True) - if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True) - if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True) - save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True) - gpu_numbers1Bb = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True) - with gr.Row(): - button1Bb_open = gr.Button(i18n("开启GPT训练"), variant="primary",visible=True) - button1Bb_close = gr.Button(i18n("终止GPT训练"), variant="primary",visible=False) - info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息")) + with gr.Column(): + with gr.Tabs(): + with gr.Tab(i18n("1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。")): + with gr.Group(): + batch_size = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True) + total_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("总训练轮数total_epoch,不建议太高"),value=8,interactive=True) + text_low_lr_rate = gr.Slider(minimum=0.2,maximum=0.6,step=0.05,label=i18n("文本模块学习率权重"),value=0.4,interactive=True) + save_every_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("保存频率save_every_epoch"),value=4,interactive=True) + if_save_latest = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True) + if_save_every_weights = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True) + gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True) + with gr.Group(): + info1Ba=gr.Textbox(label=i18n("SoVITS训练进程输出信息")) + button1Ba_open = gr.Button(i18n("开启SoVITS训练"), variant="primary",visible=True) + button1Ba_close = gr.Button(i18n("终止SoVITS训练"), variant="primary",visible=False) + with gr.Column(): + with gr.Tabs(): + with gr.Tab(i18n("1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。")): + with gr.Group(): + batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True) + total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True) + if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True) + if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True) + if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True) + save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True) + gpu_numbers1Bb = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True) + with gr.Group(): + info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息")) + button1Bb_open = gr.Button(i18n("开启GPT训练"), variant="primary",visible=True) + button1Bb_close = gr.Button(i18n("终止GPT训练"), variant="primary",visible=False) button1Ba_open.click(open1Ba, [batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D], [info1Ba,button1Ba_open,button1Ba_close]) button1Ba_close.click(close1Ba, [], [info1Ba,button1Ba_open,button1Ba_close]) button1Bb_open.click(open1Bb, [batch_size1Bb,total_epoch1Bb,exp_name,if_dpo,if_save_latest1Bb,if_save_every_weights1Bb,save_every_epoch1Bb,gpu_numbers1Bb,pretrained_s1], [info1Bb,button1Bb_open,button1Bb_close]) button1Bb_close.click(close1Bb, [], [info1Bb,button1Bb_open,button1Bb_close]) - with gr.TabItem(i18n("1C-推理")): + with gr.Tab(i18n("1C-推理")): gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模,体验5秒Zero Shot TTS用。")) with gr.Row(): - GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name,interactive=True) - SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name,interactive=True) - gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True) - refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") - refresh_button.click(fn=change_choices,inputs=[],outputs=[SoVITS_dropdown,GPT_dropdown]) - with gr.Row(): - if_tts = gr.Checkbox(label=i18n("是否开启TTS推理WebUI"), show_label=True) - tts_info = gr.Textbox(label=i18n("TTS推理WebUI进程输出信息")) - if_tts.change(change_tts_inference, [if_tts,bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info]) - with gr.TabItem(i18n("2-GPT-SoVITS-变声")):gr.Markdown(value=i18n("施工中,请静候佳音")) - app.queue(concurrency_count=511, max_size=1022).launch( - server_name="0.0.0.0", - inbrowser=True, - share=is_share, - server_port=webui_port_main, - quiet=True, - ) + with gr.Column(): + gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True) + with gr.Group(): + GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name,interactive=True) + SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name,interactive=True) + refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary") + refresh_button.click(fn=change_choices,inputs=[],outputs=[SoVITS_dropdown,GPT_dropdown]) + with gr.Column(): + if_tts = gr.Checkbox(label=i18n("是否开启TTS推理WebUI"), show_label=True) + tts_info = gr.Textbox(label=i18n("TTS推理WebUI进程输出信息")) + if_tts.change(change_tts_inference, [if_tts,bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info]) + with gr.Tab(i18n("2-GPT-SoVITS-变声")):gr.Markdown(value=i18n("施工中,请静候佳音")) + + if gr.__version__.split(".")[0] == "4": + app.launch( + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=webui_port_main, + quiet=True, + ) + else: + app.queue(concurrency_count=511, max_size=1022).launch( + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=webui_port_main, + quiet=True, + ) + From 948fedbf1da940c0b6a1e63c0a00802a9d281e85 Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 19:57:10 +0800 Subject: [PATCH 07/10] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86=E5=AF=B9gr3?= =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/webui.py | 28 ++++++++++++++++++++++------ webui.py | 2 +- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index 938248f8..ab3fed79 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -17,8 +17,9 @@ from srt_utils import ( port = 8991 -if len(sys.argv) > 1: +if len(sys.argv) > 2: port = int(sys.argv[1]) + is_share = eval(sys.argv[2]) from i18n.i18n import I18nAuto @@ -199,7 +200,7 @@ def preview_merged_list(first_list_folder, second_list_folder, merge_list_charac except Exception as e: gr.Warning(f"Can't Merge, Error: {e}") return "" - + from datetime import datetime @@ -295,7 +296,7 @@ with gr.Blocks() as app: merge_list_button = gr.Button(i18n("合并文件夹与List"), variant="primary") with gr.Column(scale=2): list_preview = gr.Textbox("", lines=20, max_lines=30, label=i18n("合并后的List")) - + scan_list_button.click(scan_list_folders, [scan_list_folder], [first_list_folder, second_list_folder]) merge_list_button.click(preview_merged_list, [first_list_folder, second_list_folder, merge_list_character_name, scan_list_folder], [list_preview]) save_folder.change(lambda x:gr.Textbox(value=x), [save_folder], [scan_list_folder]) @@ -317,7 +318,7 @@ with gr.Blocks() as app: [input_audio], [character], ) - + upload_audio.change( change_character_name, [upload_audio], @@ -327,7 +328,7 @@ with gr.Blocks() as app: [upload_audio], [input_audio], ) - + merge_button.click( merge_srt, [ @@ -376,4 +377,19 @@ with gr.Blocks() as app: [save_folder, character], [character_warning], ) -app.launch(inbrowser=True, server_port=port, debug=True) +if gr.__version__.split(".")[0] == "4": + app.launch( + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=port, + quiet=True, + ) +else: + app.queue(concurrency_count=511, max_size=1022).launch( + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=port, + quiet=True, + ) diff --git a/webui.py b/webui.py index bf476b36..950f7e8c 100644 --- a/webui.py +++ b/webui.py @@ -166,7 +166,7 @@ def change_label(if_label,path_list): def change_srt_slicer(if_srt_slicer): global p_srt_slicer if(if_srt_slicer==True and p_srt_slicer==None): - cmd = '"%s" tools/srt_slicer/webui.py %s'%(python_exec,webui_port_srt_slicer) + cmd = '"%s" tools/srt_slicer/webui.py %s %s'%(python_exec,webui_port_srt_slicer,is_share) yield i18n("SRT切割工具WebUI已开启") print(cmd) p_srt_slicer = Popen(cmd, shell=True) From fc0ba42bf8ae504b9218ca318113744e0c15307b Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 21:16:00 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86bug:=20?= =?UTF-8?q?=E6=B2=A1=E6=9C=89auto=E8=AF=AD=E8=A8=80=E9=80=89=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/srt_utils.py | 2 +- tools/srt_slicer/webui.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/srt_slicer/srt_utils.py b/tools/srt_slicer/srt_utils.py index 1f030d23..23785d7d 100644 --- a/tools/srt_slicer/srt_utils.py +++ b/tools/srt_slicer/srt_utils.py @@ -72,7 +72,7 @@ def count_words_multilang(text): import pydub, os -def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserve_time, post_preserve_time, pre_silence_time, post_silence_time, language='auto', character='character'): +def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserve_time, post_preserve_time, pre_silence_time, post_silence_time, language='ZH', character='character'): list_file = os.path.join(save_folder, 'datamapping.list') with open(list_file, 'w', encoding="utf-8") as f: for i in range(len(subtitles)): diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index ab3fed79..631c45e3 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -270,7 +270,7 @@ with gr.Blocks() as app: pre_silence_time = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01, label=i18n("前置添加静音时间"),interactive=True,visible=False) post_silence_time = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, label=i18n("后置添加静音时间"),interactive=True,visible=False) with gr.Group(): - language = gr.Dropdown([i18n(i) for i in ["auto", "zh", "en", "ja", "all_zh", "all_ja"]], value="auto", label=i18n("语言"),interactive=True) + language = gr.Dropdown([i18n(i) for i in [ "ZH", "EN", "JA"]], value="ZH", label=i18n("语言"),interactive=True) audio_format = gr.Dropdown(["mp3", "wav", "ogg"], value="wav", label=i18n("音频格式"),interactive=True) with gr.Group(): save_folder = gr.Textbox("output/sliced_audio", label=i18n("保存文件夹"),interactive=True) From 1979211eda299d5a421702eef6c550374fe1bfaa Mon Sep 17 00:00:00 2001 From: XTer Date: Thu, 21 Mar 2024 21:18:53 +0800 Subject: [PATCH 09/10] =?UTF-8?q?=E4=BF=AE=E6=AD=A3bug=EF=BC=8Cis=5Fshare?= =?UTF-8?q?=E6=B2=A1=E6=9C=89=E9=BB=98=E8=AE=A4=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/webui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index 631c45e3..0e44e9e9 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -16,10 +16,11 @@ from srt_utils import ( ) port = 8991 - +is_share = False if len(sys.argv) > 2: port = int(sys.argv[1]) is_share = eval(sys.argv[2]) + from i18n.i18n import I18nAuto From 8a8ddad7e85d3f71b6965602d3763209b854d467 Mon Sep 17 00:00:00 2001 From: XTer Date: Fri, 22 Mar 2024 01:29:07 +0800 Subject: [PATCH 10/10] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BA=86=E5=88=87?= =?UTF-8?q?=E5=88=86=E9=9F=B3=E9=A2=91=E7=9A=84=E6=80=A7=E8=83=BD=E5=92=8C?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/srt_slicer/srt_utils.py | 7 ++++++- tools/srt_slicer/webui.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/srt_slicer/srt_utils.py b/tools/srt_slicer/srt_utils.py index 23785d7d..9c80fc54 100644 --- a/tools/srt_slicer/srt_utils.py +++ b/tools/srt_slicer/srt_utils.py @@ -74,11 +74,16 @@ import pydub, os def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserve_time, post_preserve_time, pre_silence_time, post_silence_time, language='ZH', character='character'): list_file = os.path.join(save_folder, 'datamapping.list') + try: + audio = pydub.AudioSegment.from_file(audio_path) + except Exception as e: + raise e with open(list_file, 'w', encoding="utf-8") as f: for i in range(len(subtitles)): subtitle = subtitles[i] start = subtitle.start.total_seconds() - pre_preserve_time end = subtitle.end.total_seconds() + post_preserve_time + if i < len(subtitles) - 1: next_subtitle = subtitles[i + 1] end = min(end, 1.0/2*(subtitle.end.total_seconds()+next_subtitle.start.total_seconds())) @@ -86,12 +91,12 @@ def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserv prev_subtitle = subtitles[i - 1] start = max(start, 1.0/2*(prev_subtitle.end.total_seconds()+subtitle.start.total_seconds())) try: - audio = pydub.AudioSegment.from_file(audio_path) sliced_audio = audio[int(start * 1000):int(end * 1000)] file_name = f'{character}_{i + 1:03d}.{format}' save_path = os.path.join(save_folder, file_name) sliced_audio.export(save_path, format=format) f.write(f"{file_name}|{character}|{language}|{subtitle.content}\n") + print(f"Slice {file_name} from {start} to {end}") except Exception as e: raise e diff --git a/tools/srt_slicer/webui.py b/tools/srt_slicer/webui.py index 0e44e9e9..0b4b0901 100644 --- a/tools/srt_slicer/webui.py +++ b/tools/srt_slicer/webui.py @@ -16,7 +16,7 @@ from srt_utils import ( ) port = 8991 -is_share = False +is_share = True if len(sys.argv) > 2: port = int(sys.argv[1]) is_share = eval(sys.argv[2])