From 6a2ab63e18684cc46fae272eafaed81236776044 Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Mon, 26 May 2025 12:43:14 +0800 Subject: [PATCH] Add new subfix webui, fix bugs in requirements --- GPT_SoVITS/text/g2pw/onnx_api.py | 5 +- requirements.txt | 2 +- tools/subfix.py | 544 +++++++++++++++++++++++++++++++ tools/subfix_webui.py | 422 ------------------------ tools/uvr5/webui.py | 20 +- webui.py | 19 +- 6 files changed, 563 insertions(+), 449 deletions(-) create mode 100644 tools/subfix.py delete mode 100644 tools/subfix_webui.py diff --git a/GPT_SoVITS/text/g2pw/onnx_api.py b/GPT_SoVITS/text/g2pw/onnx_api.py index a8268107..9d153745 100644 --- a/GPT_SoVITS/text/g2pw/onnx_api.py +++ b/GPT_SoVITS/text/g2pw/onnx_api.py @@ -23,8 +23,9 @@ from .utils import load_config onnxruntime.set_default_logger_severity(3) try: onnxruntime.preload_dlls() -except:pass - #traceback.print_exc() +except: + pass + # traceback.print_exc() warnings.filterwarnings("ignore") model_version = "1.1" diff --git a/requirements.txt b/requirements.txt index 90e4957d..4ad45b2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ pytorch-lightning>=2.4 gradio<5 ffmpeg-python onnxruntime; platform_machine == "aarch64" or platform_machine == "arm64" -onnxruntime-gpu; platform_machine == "x86_64" or platform_machine == "AMD64" +onnxruntime-gpu; platform_machine == "x86_64" or platform_machine == "amd64" tqdm funasr==1.0.27 cn2an diff --git a/tools/subfix.py b/tools/subfix.py new file mode 100644 index 00000000..fb4cfdd6 --- /dev/null +++ b/tools/subfix.py @@ -0,0 +1,544 @@ +import datetime +import os +import threading +import traceback +from dataclasses import dataclass +from functools import partial +from typing import List + +import click +import gradio as gr +import librosa +import numpy as np +import soundfile +from gradio.components.audio import WaveformOptions + +from tools.i18n.i18n import I18nAuto + +PARTIAL_EXIT = partial(os._exit, 0) + +LANGUAGE_MAP: dict = { + "ZH": "ZH", + "zh": "ZH", + "JP": "JA", + "jp": "JA", + "JA": "JA", + "ja": "JA", + "EN": "EN", + "en": "EN", + "KO": "KO", + "ko": "KO", + "yue": "YUE", + "YUE": "YUE", +} + +LOCK = threading.Lock() + +IS_CLI = True + + +@dataclass +class SubfixErr: + error: Exception + tracebacks: str + + +class Subfix: + batch_size: int = 2 + cur_idx: int = 0 + list_path: str + textboxes: List[gr.Textbox] = [] + audios: List[gr.Audio] = [] + languages: List[gr.Dropdown] = [] + selections: List[gr.Checkbox] = [] + transcriptions_list: List[List[str]] = [] + + merge_audio_button: gr.Button + delete_audio_button: gr.Button + previous_index_button1: gr.Button + next_index_button1: gr.Button + previous_index_button2: gr.Button + next_index_button2: gr.Button + index_slider: gr.Slider + batch_size_slider: gr.Slider + close_button: gr.Button + + def __init__(self, i18n: I18nAuto): + self.i18n = i18n + with gr.Row(equal_height=True): + with gr.Column(scale=2, min_width=160): + self.index_slider = gr.Slider(minimum=0, maximum=1, step=1, label=i18n("音频索引")) + with gr.Column(scale=1, min_width=160): + self.previous_index_button1 = gr.Button(value=i18n("上一页"), elem_id="btn_previous") + with gr.Column(scale=1, min_width=160): + self.next_index_button1 = gr.Button(value=i18n("下一页"), elem_id="btn_next") + with gr.Row(equal_height=True): + with gr.Column(scale=2, min_width=160): + self.batch_size_slider = gr.Slider( + minimum=4, maximum=20, step=2, value=self.batch_size, label=i18n("每页音频条数") + ) + with gr.Column(scale=1, min_width=160): + self.merge_audio_button = gr.Button(value=i18n("合并选中音频")) + with gr.Column(scale=1, min_width=160): + self.delete_audio_button = gr.Button(value=i18n("删除选中音频")) + gr.render( + inputs=[self.index_slider, self.batch_size_slider], + triggers=[self.batch_size_slider.change], + )(self._render_text_area) + + @property + def max_index(self): + return len(self.transcriptions_list) + + def load_list(self, list_path: str): + with open(list_path, mode="r", encoding="utf-8") as f: + list_data = f.readlines() + for idx, transcriptions in enumerate(list_data): + data = transcriptions.split("|") + if len(data) != 4: + print(f"Error Line {idx + 1}: {'|'.join(data)}") + continue + audio_name, audio_folder, text_language, text = data + self.transcriptions_list.append( + [ + audio_name, + audio_folder, + LANGUAGE_MAP.get(text_language.upper(), text_language.upper()), + text.strip("\n").strip(), + ] + ) + self.list_path = list_path + + def save_list(self): + data = [] + for transcriptions in self.transcriptions_list: + data.append("|".join(transcriptions)) + try: + with open(self.list_path, mode="w", encoding="utf-8") as f: + f.write("\n".join(data)) + except Exception as e: + return SubfixErr(e, traceback.format_exc()) + + def change_index(self, index: int): + audios = [] + texts = [] + languages = [] + checkboxs = [] + with LOCK: + for i in range(index, index + self.batch_size): + if i <= self.max_index - 1: + audios.append(gr.Audio(value=self.transcriptions_list[i][0])) + texts.append(gr.Textbox(value=self.transcriptions_list[i][3], label=self.i18n("Text") + f" {i}")) + languages.append(gr.Dropdown(value=self.transcriptions_list[i][2])) + else: + audios.append(gr.Audio(value=None, interactive=False)) + texts.append(gr.Textbox(value=None, label=self.i18n("Text") + f" {i}", interactive=False)) + languages.append(gr.Dropdown(value=None, interactive=False)) + checkboxs = [gr.Checkbox(False) for i in range(self.batch_size)] + self.cur_idx = index + return *audios, *texts, *languages, *checkboxs + + def next_page(self, index: int): + batch_size = self.batch_size + max_index = self.max_index - batch_size + if max_index <= 0: + max_index = 1 + index = min(index + batch_size, max_index - 1) + return gr.Slider(value=index), *self.change_index(index) + + def previous_page(self, index: int): + batch_size = self.batch_size + index = max(index - batch_size, 0) + return gr.Slider(value=index), *self.change_index(index) + + def delete_audio(self, index, *selected): + delete_index = [i + index for i, _ in enumerate(selected) if _] + delete_index = [i for i in delete_index if i < self.max_index - 1] + for idx in delete_index[::-1]: + self.transcriptions_list.pop(idx) + self.save_list() + return gr.Slider(value=index, maximum=self.max_index), *self.change_index(index) + + def submit(self, *input): + with LOCK: + index = self.cur_idx + batch_size = self.batch_size + texts = input[: len(input) // 2] + languages = input[len(input) // 2 :] + if texts is None or languages is None: + raise ValueError() + for idx in range(index, min(index + batch_size, self.max_index - 1)): + self.transcriptions_list[idx][3] = texts[idx - index].strip().strip("\n") + self.transcriptions_list[idx][2] = languages[idx - index] + result = self.save_list() + if isinstance(result, SubfixErr): + gr.Warning(str(result.error)) + print(result.tracebacks) + + def merge_audio(self, index, *selected): + batch_size = self.batch_size + merge_index = [i + index for i, _ in enumerate(selected) if _] + merge_index = [i for i in merge_index if i < self.max_index - 1] + if len(merge_index) < 2: + return *(gr.skip() for _ in range(batch_size * 3 + 1)), *(gr.Checkbox(False) for _ in range(batch_size)) + else: + merge_texts = [] + merge_audios = [] + first_itm_index = merge_index[0] + first_itm_path = f"{os.path.splitext(self.transcriptions_list[first_itm_index][0])[0]}_{str(datetime.datetime.now().strftime(r'%Y%m%d_%H%M%S'))}.wav" + final_audio_list = [] + for idx in merge_index: + merge_texts.append(self.transcriptions_list[idx][3]) + merge_audios.append(self.transcriptions_list[idx][0]) + for idx in merge_index[:0:-1]: + self.transcriptions_list.pop(idx) + for audio_path in merge_audios: + final_audio_list.append(librosa.load(audio_path, sr=32000, mono=True)[0]) + final_audio_list.append(np.zeros(int(32000 * 0.3))) + final_audio_list.pop() + final_audio = np.concatenate(final_audio_list) + soundfile.write(first_itm_path, final_audio, 32000) + self.transcriptions_list[first_itm_index][0] = first_itm_path + self.transcriptions_list[first_itm_index][3] = ",".join(merge_texts) + return gr.Slider(maximum=self.max_index), *self.change_index(index) + + def _render_text_area(self, index, batch_size): + i18n = self.i18n + self.textboxes = [] + self.audios = [] + self.languages = [] + self.selections = [] + self.batch_size = batch_size + for i in range(index, index + batch_size): + with gr.Row(equal_height=True): + if i <= self.max_index - 1: + with gr.Column(scale=2, min_width=160): + textbox_tmp = gr.Textbox( + value=self.transcriptions_list[i][3], + label=i18n("Text") + f" {i}", + lines=2, + max_lines=3, + interactive=True, + ) + with gr.Column(scale=1, min_width=160): + audio_tmp = gr.Audio( + value=self.transcriptions_list[i][0], + show_label=False, + show_download_button=False, + editable=False, + waveform_options={"show_recording_waveform": False, "show_controls": False}, + ) + with gr.Column(scale=1, min_width=160): + with gr.Group(): + with gr.Row(): + language_tmp = gr.Dropdown( + choices=["ZH", "EN", "JA", "KO", "YUE"], + value=self.transcriptions_list[i][2], + allow_custom_value=True, + label=i18n("文本语言"), + interactive=True, + ) + with gr.Row(): + selection_tmp = gr.Checkbox( + label=i18n("选择音频"), + ) + else: + with gr.Column(scale=2, min_width=160): + textbox_tmp = gr.Textbox( + label=i18n("Text") + f" {i}", + lines=2, + max_lines=3, + elem_id="subfix_textbox", + interactive=False, + ) + with gr.Column(scale=1, min_width=160): + audio_tmp = gr.Audio( + streaming=True, + show_label=False, + show_download_button=False, + interactive=False, + waveform_options=WaveformOptions(show_recording_waveform=False, show_controls=False), + ) + with gr.Column(scale=1, min_width=160): + with gr.Group(): + with gr.Row(): + language_tmp = gr.Dropdown( + choices=["ZH", "EN", "JA", "KO", "YUE"], + value=None, + allow_custom_value=True, + label=i18n("文本语言"), + interactive=False, + ) + with gr.Row(): + selection_tmp = gr.Checkbox( + label=i18n("选择音频"), + interactive=False, + ) + + self.textboxes.append(textbox_tmp) + self.audios.append(audio_tmp) + self.languages.append(language_tmp) + self.selections.append(selection_tmp) + with gr.Row(equal_height=True): + with gr.Column(scale=2, min_width=160): + self.close_button = gr.Button(value=i18n("关闭打标WebUI"), variant="stop") + with gr.Column(scale=1, min_width=160): + self.previous_index_button2 = gr.Button(value=i18n("上一页")) + with gr.Column(scale=1, min_width=160): + self.next_index_button2 = gr.Button(value=i18n("下一页")) + + # Event Trigger Binding + + self.index_slider.release( # Change Index Button + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + ).success( + fn=self.change_index, + inputs=[ + self.index_slider, + ], + outputs=[ + *self.audios, + *self.textboxes, + *self.languages, + *self.selections, + ], + max_batch_size=1, + trigger_mode="once", + ) + + self.next_index_button1.click( # Next Page Button on the Top + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + ).success( + fn=self.next_page, + inputs=[ + self.index_slider, + ], + outputs=[ + self.index_slider, + *self.audios, + *self.textboxes, + *self.languages, + *self.selections, + ], + scroll_to_output=True, + trigger_mode="once", + ) + + self.next_index_button2.click( # Next Page Button on the Bottom, Binding to Next Page Button on the Top + lambda: None, + [], + [], + js=""" + () => { + document.getElementById("btn_next").click(); + }""", + trigger_mode="once", + ) + + self.previous_index_button1.click( # Previous Page Button on the Top + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + ).success( + fn=self.previous_page, + inputs=[ + self.index_slider, + ], + outputs=[ + self.index_slider, + *self.audios, + *self.textboxes, + *self.languages, + *self.selections, + ], + scroll_to_output=True, + trigger_mode="once", + ) + + self.previous_index_button2.click( # Previous Page Button on the Bottom, Binding to Previous Page Button on the Top + lambda: None, + [], + [], + js=""" + () => { + document.getElementById("btn_previous").click(); + }""", + trigger_mode="once", + ) + + self.delete_audio_button.click( # Delete the Audio in the Transcription File + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + ).success( + fn=self.delete_audio, + inputs=[ + self.index_slider, + *self.selections, + ], + outputs=[ + self.index_slider, + *self.audios, + *self.textboxes, + *self.languages, + *self.selections, + ], + scroll_to_output=True, + ).success( + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + show_progress="hidden", + ) + + self.merge_audio_button.click( # Delete the Audio in the Transcription File + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + ).success( + fn=self.merge_audio, + inputs=[ + self.index_slider, + *self.selections, + ], + outputs=[ + self.index_slider, + *self.audios, + *self.textboxes, + *self.languages, + *self.selections, + ], + scroll_to_output=True, + ).success( + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + show_progress="hidden", + ) + if not IS_CLI: + self.close_button.click( # Close the Subfix Tab, Binding to Close Button on Audio Processing Tab + fn=lambda: None, + inputs=[], + outputs=[], + js=""" + () => { + document.getElementById("btn_close").click(); + }""", + trigger_mode="once", + ) + else: + self.close_button.click( # Close the Subfix Tab, Binding to Close Button on Audio Processing Tab + fn=self.submit, + inputs=[ + *self.textboxes, + *self.languages, + ], + outputs=[], + trigger_mode="once", + ).then( + fn=PARTIAL_EXIT, + inputs=[], + outputs=[], + ) + + def render(self, list_path: str, batch_size: int = 10): + self.batch_size = batch_size + self.transcriptions_list = [] + self.load_list(list_path=list_path) + + +@click.command(name="subfix") +@click.argument( + "list-path", + metavar="", + type=click.Path(exists=True, dir_okay=False, readable=True, writable=True), + required=True, +) +@click.option( + "--i18n-lang", + type=str, + default="Auto", + help="Languages for internationalisation", + show_default=True, +) +@click.option( + "--port", + type=int, + default="9871", + show_default=True, +) +@click.option( + "--share", + type=bool, + default=False, + show_default=True, +) +def main(list_path: str = "", i18n_lang="Auto", port=9871, share=False): + """Web-Based audio subtitle editing and multilingual annotation Tool + + Accept a transcription list path to launch a Gradio WebUI for text editing + """ + + with gr.Blocks(analytics_enabled=False) as app: + subfix = Subfix(I18nAuto(i18n_lang)) + subfix.render(list_path=list_path) + if subfix.max_index > 0: + timer = gr.Timer(0.1) + + timer.tick( + fn=lambda: ( + gr.Slider(value=0, maximum=subfix.max_index), + gr.Slider(value=10), + gr.Timer(active=False), + ), + inputs=[], + outputs=[ + subfix.index_slider, + subfix.batch_size_slider, + timer, + ], + ) + else: + timer = gr.Timer(2) + + timer.tick( + fn=lambda x: (_ for _ in ()).throw(gr.Error("Invalid List")) if x is None else None, + inputs=[], + outputs=[], + ) + app.queue().launch( + server_name="0.0.0.0", + inbrowser=True, + share=share, + server_port=port, + quiet=False, + ) + + +if __name__ == "__main__": + main() diff --git a/tools/subfix_webui.py b/tools/subfix_webui.py deleted file mode 100644 index 3f2fd03e..00000000 --- a/tools/subfix_webui.py +++ /dev/null @@ -1,422 +0,0 @@ -import sys -from tools.i18n.i18n import I18nAuto, scan_language_list -language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" -i18n = I18nAuto(language=language) -import argparse -import copy -import json -import os -import uuid - -try: - import gradio.analytics as analytics - - analytics.version_check = lambda: None -except: - ... - -import gradio as gr -import librosa -import numpy as np -import soundfile - -g_json_key_text = "" -g_json_key_path = "" -g_load_file = "" -g_load_format = "" - -g_max_json_index = 0 -g_index = 0 -g_batch = 10 -g_text_list = [] -g_audio_list = [] -g_checkbox_list = [] -g_data_json = [] - - -def reload_data(index, batch): - global g_index - g_index = index - global g_batch - g_batch = batch - datas = g_data_json[index : index + batch] - output = [] - for d in datas: - output.append({g_json_key_text: d[g_json_key_text], g_json_key_path: d[g_json_key_path]}) - return output - - -def b_change_index(index, batch): - global g_index, g_batch - g_index, g_batch = index, batch - datas = reload_data(index, batch) - output = [] - for i, _ in enumerate(datas): - output.append( - # gr.Textbox( - # label=f"Text {i+index}", - # value=_[g_json_key_text]#text - # ) - {"__type__": "update", "label": f"Text {i + index}", "value": _[g_json_key_text]} - ) - for _ in range(g_batch - len(datas)): - output.append( - # gr.Textbox( - # label=f"Text", - # value="" - # ) - {"__type__": "update", "label": "Text", "value": ""} - ) - for _ in datas: - output.append(_[g_json_key_path]) - for _ in range(g_batch - len(datas)): - output.append(None) - for _ in range(g_batch): - output.append(False) - return output - - -def b_next_index(index, batch): - b_save_file() - if (index + batch) <= g_max_json_index: - return index + batch, *b_change_index(index + batch, batch) - else: - return index, *b_change_index(index, batch) - - -def b_previous_index(index, batch): - b_save_file() - if (index - batch) >= 0: - return index - batch, *b_change_index(index - batch, batch) - else: - return 0, *b_change_index(0, batch) - - -def b_submit_change(*text_list): - global g_data_json - change = False - for i, new_text in enumerate(text_list): - if g_index + i <= g_max_json_index: - new_text = new_text.strip() + " " - if g_data_json[g_index + i][g_json_key_text] != new_text: - g_data_json[g_index + i][g_json_key_text] = new_text - change = True - if change: - b_save_file() - return g_index, *b_change_index(g_index, g_batch) - - -def b_delete_audio(*checkbox_list): - global g_data_json, g_index, g_max_json_index - b_save_file() - change = False - for i, checkbox in reversed(list(enumerate(checkbox_list))): - if g_index + i < len(g_data_json): - if checkbox == True: - g_data_json.pop(g_index + i) - change = True - - g_max_json_index = len(g_data_json) - 1 - if g_index > g_max_json_index: - g_index = g_max_json_index - g_index = g_index if g_index >= 0 else 0 - if change: - b_save_file() - # return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch) - return { - "value": g_index, - "__type__": "update", - "maximum": (g_max_json_index if g_max_json_index >= 0 else 0), - }, *b_change_index(g_index, g_batch) - - -def b_invert_selection(*checkbox_list): - new_list = [not item if item is True else True for item in checkbox_list] - return new_list - - -def get_next_path(filename): - base_dir = os.path.dirname(filename) - base_name = os.path.splitext(os.path.basename(filename))[0] - for i in range(100): - new_path = os.path.join(base_dir, f"{base_name}_{str(i).zfill(2)}.wav") - if not os.path.exists(new_path): - return new_path - return os.path.join(base_dir, f"{str(uuid.uuid4())}.wav") - - -def b_audio_split(audio_breakpoint, *checkbox_list): - global g_data_json, g_max_json_index - checked_index = [] - for i, checkbox in enumerate(checkbox_list): - if checkbox == True and g_index + i < len(g_data_json): - checked_index.append(g_index + i) - if len(checked_index) == 1: - index = checked_index[0] - audio_json = copy.deepcopy(g_data_json[index]) - path = audio_json[g_json_key_path] - data, sample_rate = librosa.load(path, sr=None, mono=True) - audio_maxframe = len(data) - break_frame = int(audio_breakpoint * sample_rate) - - if break_frame >= 1 and break_frame < audio_maxframe: - audio_first = data[0:break_frame] - audio_second = data[break_frame:] - nextpath = get_next_path(path) - soundfile.write(nextpath, audio_second, sample_rate) - soundfile.write(path, audio_first, sample_rate) - g_data_json.insert(index + 1, audio_json) - g_data_json[index + 1][g_json_key_path] = nextpath - b_save_file() - - g_max_json_index = len(g_data_json) - 1 - # return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch) - return {"value": g_index, "maximum": g_max_json_index, "__type__": "update"}, *b_change_index(g_index, g_batch) - - -def b_merge_audio(interval_r, *checkbox_list): - global g_data_json, g_max_json_index - b_save_file() - checked_index = [] - audios_path = [] - audios_text = [] - for i, checkbox in enumerate(checkbox_list): - if checkbox == True and g_index + i < len(g_data_json): - checked_index.append(g_index + i) - - if len(checked_index) > 1: - for i in checked_index: - audios_path.append(g_data_json[i][g_json_key_path]) - audios_text.append(g_data_json[i][g_json_key_text]) - for i in reversed(checked_index[1:]): - g_data_json.pop(i) - - base_index = checked_index[0] - base_path = audios_path[0] - g_data_json[base_index][g_json_key_text] = "".join(audios_text) - - audio_list = [] - l_sample_rate = None - for i, path in enumerate(audios_path): - data, sample_rate = librosa.load(path, sr=l_sample_rate, mono=True) - l_sample_rate = sample_rate - if i > 0: - silence = np.zeros(int(l_sample_rate * interval_r)) - audio_list.append(silence) - - audio_list.append(data) - - audio_concat = np.concatenate(audio_list) - - soundfile.write(base_path, audio_concat, l_sample_rate) - - b_save_file() - - g_max_json_index = len(g_data_json) - 1 - - # return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch) - return {"value": g_index, "maximum": g_max_json_index, "__type__": "update"}, *b_change_index(g_index, g_batch) - - -def b_save_json(): - with open(g_load_file, "w", encoding="utf-8") as file: - for data in g_data_json: - file.write(f"{json.dumps(data, ensure_ascii=False)}\n") - - -def b_save_list(): - with open(g_load_file, "w", encoding="utf-8") as file: - for data in g_data_json: - wav_path = data["wav_path"] - speaker_name = data["speaker_name"] - language = data["language"] - text = data["text"] - file.write(f"{wav_path}|{speaker_name}|{language}|{text}".strip() + "\n") - - -def b_load_json(): - global g_data_json, g_max_json_index - with open(g_load_file, "r", encoding="utf-8") as file: - g_data_json = file.readlines() - g_data_json = [json.loads(line) for line in g_data_json] - g_max_json_index = len(g_data_json) - 1 - - -def b_load_list(): - global g_data_json, g_max_json_index - with open(g_load_file, "r", encoding="utf-8") as source: - data_list = source.readlines() - for _ in data_list: - data = _.split("|") - if len(data) == 4: - wav_path, speaker_name, language, text = data - g_data_json.append( - {"wav_path": wav_path, "speaker_name": speaker_name, "language": language, "text": text.strip()} - ) - else: - print("error line:", data) - g_max_json_index = len(g_data_json) - 1 - - -def b_save_file(): - if g_load_format == "json": - b_save_json() - elif g_load_format == "list": - b_save_list() - - -def b_load_file(): - if g_load_format == "json": - b_load_json() - elif g_load_format == "list": - b_load_list() - - -def set_global(load_json, load_list, json_key_text, json_key_path, batch): - global g_json_key_text, g_json_key_path, g_load_file, g_load_format, g_batch - - g_batch = int(batch) - - if load_json != "None": - g_load_format = "json" - g_load_file = load_json - elif load_list != "None": - g_load_format = "list" - g_load_file = load_list - else: - g_load_format = "list" - g_load_file = "demo.list" - - g_json_key_text = json_key_text - g_json_key_path = json_key_path - - b_load_file() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Process some integers.") - parser.add_argument("--load_json", default="None", help="source file, like demo.json") - parser.add_argument("--is_share", default="False", help="whether webui is_share=True") - parser.add_argument("--load_list", default="None", help="source file, like demo.list") - parser.add_argument("--webui_port_subfix", default=9871, help="source file, like demo.list") - parser.add_argument("--json_key_text", default="text", help="the text key name in json, Default: text") - parser.add_argument("--json_key_path", default="wav_path", help="the path key name in json, Default: wav_path") - parser.add_argument("--g_batch", default=10, help="max number g_batch wav to display, Default: 10") - - args = parser.parse_args() - - set_global(args.load_json, args.load_list, args.json_key_text, args.json_key_path, args.g_batch) - - with gr.Blocks(analytics_enabled=False) as demo: - gr.Markdown( - value=i18n("Submit Text: 将当前页所有文本框内容手工保存到内存和文件(翻页前后或者退出标注页面前如果没点这个按钮,你再翻回来就回滚了,白忙活。)") - ) - with gr.Row(): - btn_change_index = gr.Button("Change Index") - btn_submit_change = gr.Button("Submit Text") - btn_merge_audio = gr.Button("Merge Audio") - btn_delete_audio = gr.Button("Delete Audio") - btn_previous_index = gr.Button("Previous Index") - btn_next_index = gr.Button("Next Index") - - with gr.Row(): - index_slider = gr.Slider(minimum=0, maximum=g_max_json_index, value=g_index, step=1, label="Index", scale=3) - splitpoint_slider = gr.Slider( - minimum=0, maximum=120.0, value=0, step=0.1, label="Audio Split Point(s)", scale=3 - ) - btn_audio_split = gr.Button("Split Audio", scale=1) - btn_save_json = gr.Button("Save File", visible=True, scale=1) - btn_invert_selection = gr.Button("Invert Selection", scale=1) - - with gr.Row(): - with gr.Column(): - for _ in range(0, g_batch): - with gr.Row(): - text = gr.Textbox(label="Text", visible=True, scale=5) - audio_output = gr.Audio(label="Output Audio", visible=True, scale=5) - audio_check = gr.Checkbox(label="Yes", show_label=True, info="Choose Audio", scale=1) - g_text_list.append(text) - g_audio_list.append(audio_output) - g_checkbox_list.append(audio_check) - - with gr.Row(): - batchsize_slider = gr.Slider( - minimum=1, maximum=g_batch, value=g_batch, step=1, label="Batch Size", scale=3, interactive=False - ) - interval_slider = gr.Slider(minimum=0, maximum=2, value=0, step=0.01, label="Interval", scale=3) - btn_theme_dark = gr.Button("Light Theme", link="?__theme=light", scale=1) - btn_theme_light = gr.Button("Dark Theme", link="?__theme=dark", scale=1) - - btn_change_index.click( - b_change_index, - inputs=[ - index_slider, - batchsize_slider, - ], - outputs=[*g_text_list, *g_audio_list, *g_checkbox_list], - ) - - btn_submit_change.click( - b_submit_change, - inputs=[ - *g_text_list, - ], - outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list], - ) - - btn_previous_index.click( - b_previous_index, - inputs=[ - index_slider, - batchsize_slider, - ], - outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list], - ) - - btn_next_index.click( - b_next_index, - inputs=[ - index_slider, - batchsize_slider, - ], - outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list], - ) - - btn_delete_audio.click( - b_delete_audio, - inputs=[*g_checkbox_list], - outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list], - ) - - btn_merge_audio.click( - b_merge_audio, - inputs=[interval_slider, *g_checkbox_list], - outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list], - ) - - btn_audio_split.click( - b_audio_split, - inputs=[splitpoint_slider, *g_checkbox_list], - outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list], - ) - - btn_invert_selection.click(b_invert_selection, inputs=[*g_checkbox_list], outputs=[*g_checkbox_list]) - - btn_save_json.click(b_save_file) - - demo.load( - b_change_index, - inputs=[ - index_slider, - batchsize_slider, - ], - outputs=[*g_text_list, *g_audio_list, *g_checkbox_list], - ) - - demo.launch( - server_name="0.0.0.0", - inbrowser=True, - # quiet=True, - share=eval(args.is_share), - server_port=int(args.webui_port_subfix), - ) diff --git a/tools/uvr5/webui.py b/tools/uvr5/webui.py index f5f8d3f6..0112a1aa 100644 --- a/tools/uvr5/webui.py +++ b/tools/uvr5/webui.py @@ -1,23 +1,22 @@ import logging import os +import sys import traceback -import gradio as gr - -from tools.i18n.i18n import I18nAuto -from tools.my_utils import clean_path - -i18n = I18nAuto() - -logger = logging.getLogger(__name__) -import sys - import ffmpeg +import gradio as gr import torch from bsroformer import Roformer_Loader from mdxnet import MDXNetDereverb from vr import AudioPre, AudioPreDeEcho +from tools.i18n.i18n import I18nAuto +from tools.my_utils import clean_path, load_cudnn + +i18n = I18nAuto() + +logger = logging.getLogger(__name__) + weight_uvr5_root = "tools/uvr5/uvr5_weights" uvr5_names = [] for name in os.listdir(weight_uvr5_root): @@ -44,6 +43,7 @@ def html_center(text, label="p"): def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): infos = [] + load_cudnn() try: inp_root = clean_path(inp_root) save_root_vocal = clean_path(save_root_vocal) diff --git a/webui.py b/webui.py index 0e34987a..dd9a8eb8 100644 --- a/webui.py +++ b/webui.py @@ -58,6 +58,7 @@ for site_packages_root in site_packages_roots: traceback.print_exc() import shutil import subprocess +from multiprocessing import cpu_count from subprocess import Popen from tools.assets import css, js, top_html @@ -86,14 +87,9 @@ from config import ( from tools import my_utils from tools.my_utils import check_details, check_for_existance -# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu -try: - import gradio.analytics as analytics - - analytics.version_check = lambda: None -except: - ... -import gradio as gr +language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" +os.environ["language"] = language +i18n = I18nAuto(language=language) n_cpu = cpu_count() @@ -276,12 +272,7 @@ def change_label(path_list): if p_label is None: check_for_existance([path_list]) path_list = my_utils.clean_path(path_list) - cmd = '"%s" -s tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % ( - python_exec, - path_list, - webui_port_subfix, - is_share, - ) + cmd = f'"{python_exec}" -s tools/subfix.py --i18n-lang {language} --port {webui_port_subfix} --share {is_share} "{path_list}"' yield ( process_info(process_name_subfix, "opened"), {"__type__": "update", "visible": False},