Add new subfix webui, fix bugs in requirements

2025-12-17 01:59:08 +08:00 · 2025-05-26 12:43:14 +08:00 · 2025-05-26 12:43:14 +08:00 · 6a2ab63e18
commit 6a2ab63e18
parent ed89a02337
6 changed files with 563 additions and 449 deletions
--- a/GPT_SoVITS/text/g2pw/onnx_api.py
+++ b/GPT_SoVITS/text/g2pw/onnx_api.py
@ -23,8 +23,9 @@ from .utils import load_config
 onnxruntime.set_default_logger_severity(3)
 try:
    onnxruntime.preload_dlls()
-except:pass
+except:
-    #traceback.print_exc()
+    pass
    # traceback.print_exc()
 warnings.filterwarnings("ignore")
 model_version = "1.1"
--- a/requirements.txt
+++ b/requirements.txt
@ -8,7 +8,7 @@ pytorch-lightning>=2.4
 gradio<5
 ffmpeg-python
 onnxruntime; platform_machine == "aarch64" or platform_machine == "arm64"
-onnxruntime-gpu; platform_machine == "x86_64" or platform_machine == "AMD64"
+onnxruntime-gpu; platform_machine == "x86_64" or platform_machine == "amd64"
 tqdm
 funasr==1.0.27
 cn2an
--- a/tools/subfix.py
+++ b/tools/subfix.py
@ -0,0 +1,544 @@
 import datetime
 import os
 import threading
 import traceback
 from dataclasses import dataclass
 from functools import partial
 from typing import List
 import click
 import gradio as gr
 import librosa
 import numpy as np
 import soundfile
 from gradio.components.audio import WaveformOptions
 from tools.i18n.i18n import I18nAuto
 PARTIAL_EXIT = partial(os._exit, 0)
 LANGUAGE_MAP: dict = {
    "ZH": "ZH",
    "zh": "ZH",
    "JP": "JA",
    "jp": "JA",
    "JA": "JA",
    "ja": "JA",
    "EN": "EN",
    "en": "EN",
    "KO": "KO",
    "ko": "KO",
    "yue": "YUE",
    "YUE": "YUE",
 }
 LOCK = threading.Lock()
 IS_CLI = True
@dataclass
 class SubfixErr:
    error: Exception
    tracebacks: str
 class Subfix:
    batch_size: int = 2
    cur_idx: int = 0
    list_path: str
    textboxes: List[gr.Textbox] = []
    audios: List[gr.Audio] = []
    languages: List[gr.Dropdown] = []
    selections: List[gr.Checkbox] = []
    transcriptions_list: List[List[str]] = []
    merge_audio_button: gr.Button
    delete_audio_button: gr.Button
    previous_index_button1: gr.Button
    next_index_button1: gr.Button
    previous_index_button2: gr.Button
    next_index_button2: gr.Button
    index_slider: gr.Slider
    batch_size_slider: gr.Slider
    close_button: gr.Button
    def __init__(self, i18n: I18nAuto):
        self.i18n = i18n
        with gr.Row(equal_height=True):
            with gr.Column(scale=2, min_width=160):
                self.index_slider = gr.Slider(minimum=0, maximum=1, step=1, label=i18n("音频索引"))
            with gr.Column(scale=1, min_width=160):
                self.previous_index_button1 = gr.Button(value=i18n("上一页"), elem_id="btn_previous")
            with gr.Column(scale=1, min_width=160):
                self.next_index_button1 = gr.Button(value=i18n("下一页"), elem_id="btn_next")
        with gr.Row(equal_height=True):
            with gr.Column(scale=2, min_width=160):
                self.batch_size_slider = gr.Slider(
                    minimum=4, maximum=20, step=2, value=self.batch_size, label=i18n("每页音频条数")
                )
            with gr.Column(scale=1, min_width=160):
                self.merge_audio_button = gr.Button(value=i18n("合并选中音频"))
            with gr.Column(scale=1, min_width=160):
                self.delete_audio_button = gr.Button(value=i18n("删除选中音频"))
        gr.render(
            inputs=[self.index_slider, self.batch_size_slider],
            triggers=[self.batch_size_slider.change],
        )(self._render_text_area)
    @property
    def max_index(self):
        return len(self.transcriptions_list)
    def load_list(self, list_path: str):
        with open(list_path, mode="r", encoding="utf-8") as f:
            list_data = f.readlines()
        for idx, transcriptions in enumerate(list_data):
            data = transcriptions.split("|")
            if len(data) != 4:
                print(f"Error Line {idx + 1}: {'|'.join(data)}")
                continue
            audio_name, audio_folder, text_language, text = data
            self.transcriptions_list.append(
                [
                    audio_name,
                    audio_folder,
                    LANGUAGE_MAP.get(text_language.upper(), text_language.upper()),
                    text.strip("\n").strip(),
                ]
            )
            self.list_path = list_path
    def save_list(self):
        data = []
        for transcriptions in self.transcriptions_list:
            data.append("|".join(transcriptions))
        try:
            with open(self.list_path, mode="w", encoding="utf-8") as f:
                f.write("\n".join(data))
        except Exception as e:
            return SubfixErr(e, traceback.format_exc())
    def change_index(self, index: int):
        audios = []
        texts = []
        languages = []
        checkboxs = []
        with LOCK:
            for i in range(index, index + self.batch_size):
                if i <= self.max_index - 1:
                    audios.append(gr.Audio(value=self.transcriptions_list[i][0]))
                    texts.append(gr.Textbox(value=self.transcriptions_list[i][3], label=self.i18n("Text") + f" {i}"))
                    languages.append(gr.Dropdown(value=self.transcriptions_list[i][2]))
                else:
                    audios.append(gr.Audio(value=None, interactive=False))
                    texts.append(gr.Textbox(value=None, label=self.i18n("Text") + f" {i}", interactive=False))
                    languages.append(gr.Dropdown(value=None, interactive=False))
            checkboxs = [gr.Checkbox(False) for i in range(self.batch_size)]
        self.cur_idx = index
        return *audios, *texts, *languages, *checkboxs
    def next_page(self, index: int):
        batch_size = self.batch_size
        max_index = self.max_index - batch_size
        if max_index <= 0:
            max_index = 1
        index = min(index + batch_size, max_index - 1)
        return gr.Slider(value=index), *self.change_index(index)
    def previous_page(self, index: int):
        batch_size = self.batch_size
        index = max(index - batch_size, 0)
        return gr.Slider(value=index), *self.change_index(index)
    def delete_audio(self, index, *selected):
        delete_index = [i + index for i, _ in enumerate(selected) if _]
        delete_index = [i for i in delete_index if i < self.max_index - 1]
        for idx in delete_index[::-1]:
            self.transcriptions_list.pop(idx)
        self.save_list()
        return gr.Slider(value=index, maximum=self.max_index), *self.change_index(index)
    def submit(self, *input):
        with LOCK:
            index = self.cur_idx
            batch_size = self.batch_size
            texts = input[: len(input) // 2]
            languages = input[len(input) // 2 :]
            if texts is None or languages is None:
                raise ValueError()
            for idx in range(index, min(index + batch_size, self.max_index - 1)):
                self.transcriptions_list[idx][3] = texts[idx - index].strip().strip("\n")
                self.transcriptions_list[idx][2] = languages[idx - index]
            result = self.save_list()
            if isinstance(result, SubfixErr):
                gr.Warning(str(result.error))
                print(result.tracebacks)
    def merge_audio(self, index, *selected):
        batch_size = self.batch_size
        merge_index = [i + index for i, _ in enumerate(selected) if _]
        merge_index = [i for i in merge_index if i < self.max_index - 1]
        if len(merge_index) < 2:
            return *(gr.skip() for _ in range(batch_size * 3 + 1)), *(gr.Checkbox(False) for _ in range(batch_size))
        else:
            merge_texts = []
            merge_audios = []
            first_itm_index = merge_index[0]
            first_itm_path = f"{os.path.splitext(self.transcriptions_list[first_itm_index][0])[0]}_{str(datetime.datetime.now().strftime(r'%Y%m%d_%H%M%S'))}.wav"
            final_audio_list = []
            for idx in merge_index:
                merge_texts.append(self.transcriptions_list[idx][3])
                merge_audios.append(self.transcriptions_list[idx][0])
            for idx in merge_index[:0:-1]:
                self.transcriptions_list.pop(idx)
            for audio_path in merge_audios:
                final_audio_list.append(librosa.load(audio_path, sr=32000, mono=True)[0])
                final_audio_list.append(np.zeros(int(32000 * 0.3)))
            final_audio_list.pop()
            final_audio = np.concatenate(final_audio_list)
            soundfile.write(first_itm_path, final_audio, 32000)
            self.transcriptions_list[first_itm_index][0] = first_itm_path
            self.transcriptions_list[first_itm_index][3] = ",".join(merge_texts)
            return gr.Slider(maximum=self.max_index), *self.change_index(index)
    def _render_text_area(self, index, batch_size):
        i18n = self.i18n
        self.textboxes = []
        self.audios = []
        self.languages = []
        self.selections = []
        self.batch_size = batch_size
        for i in range(index, index + batch_size):
            with gr.Row(equal_height=True):
                if i <= self.max_index - 1:
                    with gr.Column(scale=2, min_width=160):
                        textbox_tmp = gr.Textbox(
                            value=self.transcriptions_list[i][3],
                            label=i18n("Text") + f" {i}",
                            lines=2,
                            max_lines=3,
                            interactive=True,
                        )
                    with gr.Column(scale=1, min_width=160):
                        audio_tmp = gr.Audio(
                            value=self.transcriptions_list[i][0],
                            show_label=False,
                            show_download_button=False,
                            editable=False,
                            waveform_options={"show_recording_waveform": False, "show_controls": False},
                        )
                    with gr.Column(scale=1, min_width=160):
                        with gr.Group():
                            with gr.Row():
                                language_tmp = gr.Dropdown(
                                    choices=["ZH", "EN", "JA", "KO", "YUE"],
                                    value=self.transcriptions_list[i][2],
                                    allow_custom_value=True,
                                    label=i18n("文本语言"),
                                    interactive=True,
                                )
                            with gr.Row():
                                selection_tmp = gr.Checkbox(
                                    label=i18n("选择音频"),
                                )
                else:
                    with gr.Column(scale=2, min_width=160):
                        textbox_tmp = gr.Textbox(
                            label=i18n("Text") + f" {i}",
                            lines=2,
                            max_lines=3,
                            elem_id="subfix_textbox",
                            interactive=False,
                        )
                    with gr.Column(scale=1, min_width=160):
                        audio_tmp = gr.Audio(
                            streaming=True,
                            show_label=False,
                            show_download_button=False,
                            interactive=False,
                            waveform_options=WaveformOptions(show_recording_waveform=False, show_controls=False),
                        )
                    with gr.Column(scale=1, min_width=160):
                        with gr.Group():
                            with gr.Row():
                                language_tmp = gr.Dropdown(
                                    choices=["ZH", "EN", "JA", "KO", "YUE"],
                                    value=None,
                                    allow_custom_value=True,
                                    label=i18n("文本语言"),
                                    interactive=False,
                                )
                            with gr.Row():
                                selection_tmp = gr.Checkbox(
                                    label=i18n("选择音频"),
                                    interactive=False,
                                )
            self.textboxes.append(textbox_tmp)
            self.audios.append(audio_tmp)
            self.languages.append(language_tmp)
            self.selections.append(selection_tmp)
        with gr.Row(equal_height=True):
            with gr.Column(scale=2, min_width=160):
                self.close_button = gr.Button(value=i18n("关闭打标WebUI"), variant="stop")
            with gr.Column(scale=1, min_width=160):
                self.previous_index_button2 = gr.Button(value=i18n("上一页"))
            with gr.Column(scale=1, min_width=160):
                self.next_index_button2 = gr.Button(value=i18n("下一页"))
        # Event Trigger Binding
        self.index_slider.release(  # Change Index Button
            fn=self.submit,
            inputs=[
                *self.textboxes,
                *self.languages,
            ],
            outputs=[],
        ).success(
            fn=self.change_index,
            inputs=[
                self.index_slider,
            ],
            outputs=[
                *self.audios,
                *self.textboxes,
                *self.languages,
                *self.selections,
            ],
            max_batch_size=1,
            trigger_mode="once",
        )
        self.next_index_button1.click(  # Next Page Button on the Top
            fn=self.submit,
            inputs=[
                *self.textboxes,
                *self.languages,
            ],
            outputs=[],
        ).success(
            fn=self.next_page,
            inputs=[
                self.index_slider,
            ],
            outputs=[
                self.index_slider,
                *self.audios,
                *self.textboxes,
                *self.languages,
                *self.selections,
            ],
            scroll_to_output=True,
            trigger_mode="once",
        )
        self.next_index_button2.click(  # Next Page Button on the Bottom, Binding to Next Page Button on the Top
            lambda: None,
            [],
            [],
            js="""
            () => {
            document.getElementById("btn_next").click();
            }""",
            trigger_mode="once",
        )
        self.previous_index_button1.click(  # Previous Page Button on the Top
            fn=self.submit,
            inputs=[
                *self.textboxes,
                *self.languages,
            ],
            outputs=[],
        ).success(
            fn=self.previous_page,
            inputs=[
                self.index_slider,
            ],
            outputs=[
                self.index_slider,
                *self.audios,
                *self.textboxes,
                *self.languages,
                *self.selections,
            ],
            scroll_to_output=True,
            trigger_mode="once",
        )
        self.previous_index_button2.click(  # Previous Page Button on the Bottom, Binding to Previous Page Button on the Top
            lambda: None,
            [],
            [],
            js="""
            () => {
            document.getElementById("btn_previous").click();
            }""",
            trigger_mode="once",
        )
        self.delete_audio_button.click(  # Delete the Audio in the Transcription File
            fn=self.submit,
            inputs=[
                *self.textboxes,
                *self.languages,
            ],
            outputs=[],
        ).success(
            fn=self.delete_audio,
            inputs=[
                self.index_slider,
                *self.selections,
            ],
            outputs=[
                self.index_slider,
                *self.audios,
                *self.textboxes,
                *self.languages,
                *self.selections,
            ],
            scroll_to_output=True,
        ).success(
            fn=self.submit,
            inputs=[
                *self.textboxes,
                *self.languages,
            ],
            outputs=[],
            show_progress="hidden",
        )
        self.merge_audio_button.click(  # Delete the Audio in the Transcription File
            fn=self.submit,
            inputs=[
                *self.textboxes,
                *self.languages,
            ],
            outputs=[],
        ).success(
            fn=self.merge_audio,
            inputs=[
                self.index_slider,
                *self.selections,
            ],
            outputs=[
                self.index_slider,
                *self.audios,
                *self.textboxes,
                *self.languages,
                *self.selections,
            ],
            scroll_to_output=True,
        ).success(
            fn=self.submit,
            inputs=[
                *self.textboxes,
                *self.languages,
            ],
            outputs=[],
            show_progress="hidden",
        )
        if not IS_CLI:
            self.close_button.click(  # Close the Subfix Tab, Binding to Close Button on Audio Processing Tab
                fn=lambda: None,
                inputs=[],
                outputs=[],
                js="""
                () => {
                document.getElementById("btn_close").click();
                }""",
                trigger_mode="once",
            )
        else:
            self.close_button.click(  # Close the Subfix Tab, Binding to Close Button on Audio Processing Tab
                fn=self.submit,
                inputs=[
                    *self.textboxes,
                    *self.languages,
                ],
                outputs=[],
                trigger_mode="once",
            ).then(
                fn=PARTIAL_EXIT,
                inputs=[],
                outputs=[],
            )
    def render(self, list_path: str, batch_size: int = 10):
        self.batch_size = batch_size
        self.transcriptions_list = []
        self.load_list(list_path=list_path)
@click.command(name="subfix")
@click.argument(
    "list-path",
    metavar="<Path>",
    type=click.Path(exists=True, dir_okay=False, readable=True, writable=True),
    required=True,
 )
@click.option(
    "--i18n-lang",
    type=str,
    default="Auto",
    help="Languages for internationalisation",
    show_default=True,
 )
@click.option(
    "--port",
    type=int,
    default="9871",
    show_default=True,
 )
@click.option(
    "--share",
    type=bool,
    default=False,
    show_default=True,
 )
 def main(list_path: str = "", i18n_lang="Auto", port=9871, share=False):
    """Web-Based audio subtitle editing and multilingual annotation Tool
    Accept a transcription list path to launch a Gradio WebUI for text editing
    """
    with gr.Blocks(analytics_enabled=False) as app:
        subfix = Subfix(I18nAuto(i18n_lang))
        subfix.render(list_path=list_path)
        if subfix.max_index > 0:
            timer = gr.Timer(0.1)
            timer.tick(
                fn=lambda: (
                    gr.Slider(value=0, maximum=subfix.max_index),
                    gr.Slider(value=10),
                    gr.Timer(active=False),
                ),
                inputs=[],
                outputs=[
                    subfix.index_slider,
                    subfix.batch_size_slider,
                    timer,
                ],
            )
        else:
            timer = gr.Timer(2)
            timer.tick(
                fn=lambda x: (_ for _ in ()).throw(gr.Error("Invalid List")) if x is None else None,
                inputs=[],
                outputs=[],
            )
        app.queue().launch(
            server_name="0.0.0.0",
            inbrowser=True,
            share=share,
            server_port=port,
            quiet=False,
        )
 if __name__ == "__main__":
    main()
--- a/tools/subfix_webui.py
+++ b/tools/subfix_webui.py
@ -1,422 +0,0 @@
 import sys
 from tools.i18n.i18n import I18nAuto, scan_language_list
 language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
 i18n = I18nAuto(language=language)
 import argparse
 import copy
 import json
 import os
 import uuid
 try:
    import gradio.analytics as analytics
    analytics.version_check = lambda: None
 except:
    ...
 import gradio as gr
 import librosa
 import numpy as np
 import soundfile
 g_json_key_text = ""
 g_json_key_path = ""
 g_load_file = ""
 g_load_format = ""
 g_max_json_index = 0
 g_index = 0
 g_batch = 10
 g_text_list = []
 g_audio_list = []
 g_checkbox_list = []
 g_data_json = []
 def reload_data(index, batch):
    global g_index
    g_index = index
    global g_batch
    g_batch = batch
    datas = g_data_json[index : index + batch]
    output = []
    for d in datas:
        output.append({g_json_key_text: d[g_json_key_text], g_json_key_path: d[g_json_key_path]})
    return output
 def b_change_index(index, batch):
    global g_index, g_batch
    g_index, g_batch = index, batch
    datas = reload_data(index, batch)
    output = []
    for i, _ in enumerate(datas):
        output.append(
            # gr.Textbox(
            #     label=f"Text {i+index}",
            #     value=_[g_json_key_text]#text
            # )
            {"__type__": "update", "label": f"Text {i + index}", "value": _[g_json_key_text]}
        )
    for _ in range(g_batch - len(datas)):
        output.append(
            # gr.Textbox(
            #     label=f"Text",
            #     value=""
            # )
            {"__type__": "update", "label": "Text", "value": ""}
        )
    for _ in datas:
        output.append(_[g_json_key_path])
    for _ in range(g_batch - len(datas)):
        output.append(None)
    for _ in range(g_batch):
        output.append(False)
    return output
 def b_next_index(index, batch):
    b_save_file()
    if (index + batch) <= g_max_json_index:
        return index + batch, *b_change_index(index + batch, batch)
    else:
        return index, *b_change_index(index, batch)
 def b_previous_index(index, batch):
    b_save_file()
    if (index - batch) >= 0:
        return index - batch, *b_change_index(index - batch, batch)
    else:
        return 0, *b_change_index(0, batch)
 def b_submit_change(*text_list):
    global g_data_json
    change = False
    for i, new_text in enumerate(text_list):
        if g_index + i <= g_max_json_index:
            new_text = new_text.strip() + " "
            if g_data_json[g_index + i][g_json_key_text] != new_text:
                g_data_json[g_index + i][g_json_key_text] = new_text
                change = True
    if change:
        b_save_file()
    return g_index, *b_change_index(g_index, g_batch)
 def b_delete_audio(*checkbox_list):
    global g_data_json, g_index, g_max_json_index
    b_save_file()
    change = False
    for i, checkbox in reversed(list(enumerate(checkbox_list))):
        if g_index + i < len(g_data_json):
            if checkbox == True:
                g_data_json.pop(g_index + i)
                change = True
    g_max_json_index = len(g_data_json) - 1
    if g_index > g_max_json_index:
        g_index = g_max_json_index
        g_index = g_index if g_index >= 0 else 0
    if change:
        b_save_file()
    # return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch)
    return {
        "value": g_index,
        "__type__": "update",
        "maximum": (g_max_json_index if g_max_json_index >= 0 else 0),
    }, *b_change_index(g_index, g_batch)
 def b_invert_selection(*checkbox_list):
    new_list = [not item if item is True else True for item in checkbox_list]
    return new_list
 def get_next_path(filename):
    base_dir = os.path.dirname(filename)
    base_name = os.path.splitext(os.path.basename(filename))[0]
    for i in range(100):
        new_path = os.path.join(base_dir, f"{base_name}_{str(i).zfill(2)}.wav")
        if not os.path.exists(new_path):
            return new_path
    return os.path.join(base_dir, f"{str(uuid.uuid4())}.wav")
 def b_audio_split(audio_breakpoint, *checkbox_list):
    global g_data_json, g_max_json_index
    checked_index = []
    for i, checkbox in enumerate(checkbox_list):
        if checkbox == True and g_index + i < len(g_data_json):
            checked_index.append(g_index + i)
    if len(checked_index) == 1:
        index = checked_index[0]
        audio_json = copy.deepcopy(g_data_json[index])
        path = audio_json[g_json_key_path]
        data, sample_rate = librosa.load(path, sr=None, mono=True)
        audio_maxframe = len(data)
        break_frame = int(audio_breakpoint * sample_rate)
        if break_frame >= 1 and break_frame < audio_maxframe:
            audio_first = data[0:break_frame]
            audio_second = data[break_frame:]
            nextpath = get_next_path(path)
            soundfile.write(nextpath, audio_second, sample_rate)
            soundfile.write(path, audio_first, sample_rate)
            g_data_json.insert(index + 1, audio_json)
            g_data_json[index + 1][g_json_key_path] = nextpath
            b_save_file()
    g_max_json_index = len(g_data_json) - 1
    # return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch)
    return {"value": g_index, "maximum": g_max_json_index, "__type__": "update"}, *b_change_index(g_index, g_batch)
 def b_merge_audio(interval_r, *checkbox_list):
    global g_data_json, g_max_json_index
    b_save_file()
    checked_index = []
    audios_path = []
    audios_text = []
    for i, checkbox in enumerate(checkbox_list):
        if checkbox == True and g_index + i < len(g_data_json):
            checked_index.append(g_index + i)
    if len(checked_index) > 1:
        for i in checked_index:
            audios_path.append(g_data_json[i][g_json_key_path])
            audios_text.append(g_data_json[i][g_json_key_text])
        for i in reversed(checked_index[1:]):
            g_data_json.pop(i)
        base_index = checked_index[0]
        base_path = audios_path[0]
        g_data_json[base_index][g_json_key_text] = "".join(audios_text)
        audio_list = []
        l_sample_rate = None
        for i, path in enumerate(audios_path):
            data, sample_rate = librosa.load(path, sr=l_sample_rate, mono=True)
            l_sample_rate = sample_rate
            if i > 0:
                silence = np.zeros(int(l_sample_rate * interval_r))
                audio_list.append(silence)
            audio_list.append(data)
        audio_concat = np.concatenate(audio_list)
        soundfile.write(base_path, audio_concat, l_sample_rate)
        b_save_file()
    g_max_json_index = len(g_data_json) - 1
    # return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch)
    return {"value": g_index, "maximum": g_max_json_index, "__type__": "update"}, *b_change_index(g_index, g_batch)
 def b_save_json():
    with open(g_load_file, "w", encoding="utf-8") as file:
        for data in g_data_json:
            file.write(f"{json.dumps(data, ensure_ascii=False)}\n")
 def b_save_list():
    with open(g_load_file, "w", encoding="utf-8") as file:
        for data in g_data_json:
            wav_path = data["wav_path"]
            speaker_name = data["speaker_name"]
            language = data["language"]
            text = data["text"]
            file.write(f"{wav_path}|{speaker_name}|{language}|{text}".strip() + "\n")
 def b_load_json():
    global g_data_json, g_max_json_index
    with open(g_load_file, "r", encoding="utf-8") as file:
        g_data_json = file.readlines()
        g_data_json = [json.loads(line) for line in g_data_json]
        g_max_json_index = len(g_data_json) - 1
 def b_load_list():
    global g_data_json, g_max_json_index
    with open(g_load_file, "r", encoding="utf-8") as source:
        data_list = source.readlines()
        for _ in data_list:
            data = _.split("|")
            if len(data) == 4:
                wav_path, speaker_name, language, text = data
                g_data_json.append(
                    {"wav_path": wav_path, "speaker_name": speaker_name, "language": language, "text": text.strip()}
                )
            else:
                print("error line:", data)
        g_max_json_index = len(g_data_json) - 1
 def b_save_file():
    if g_load_format == "json":
        b_save_json()
    elif g_load_format == "list":
        b_save_list()
 def b_load_file():
    if g_load_format == "json":
        b_load_json()
    elif g_load_format == "list":
        b_load_list()
 def set_global(load_json, load_list, json_key_text, json_key_path, batch):
    global g_json_key_text, g_json_key_path, g_load_file, g_load_format, g_batch
    g_batch = int(batch)
    if load_json != "None":
        g_load_format = "json"
        g_load_file = load_json
    elif load_list != "None":
        g_load_format = "list"
        g_load_file = load_list
    else:
        g_load_format = "list"
        g_load_file = "demo.list"
    g_json_key_text = json_key_text
    g_json_key_path = json_key_path
    b_load_file()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Process some integers.")
    parser.add_argument("--load_json", default="None", help="source file, like demo.json")
    parser.add_argument("--is_share", default="False", help="whether webui is_share=True")
    parser.add_argument("--load_list", default="None", help="source file, like demo.list")
    parser.add_argument("--webui_port_subfix", default=9871, help="source file, like demo.list")
    parser.add_argument("--json_key_text", default="text", help="the text key name in json, Default: text")
    parser.add_argument("--json_key_path", default="wav_path", help="the path key name in json, Default: wav_path")
    parser.add_argument("--g_batch", default=10, help="max number g_batch wav to display, Default: 10")
    args = parser.parse_args()
    set_global(args.load_json, args.load_list, args.json_key_text, args.json_key_path, args.g_batch)
    with gr.Blocks(analytics_enabled=False) as demo:
        gr.Markdown(
            value=i18n("Submit Text: 将当前页所有文本框内容手工保存到内存和文件(翻页前后或者退出标注页面前如果没点这个按钮，你再翻回来就回滚了，白忙活。)")
        )
        with gr.Row():
            btn_change_index = gr.Button("Change Index")
            btn_submit_change = gr.Button("Submit Text")
            btn_merge_audio = gr.Button("Merge Audio")
            btn_delete_audio = gr.Button("Delete Audio")
            btn_previous_index = gr.Button("Previous Index")
            btn_next_index = gr.Button("Next Index")
        with gr.Row():
            index_slider = gr.Slider(minimum=0, maximum=g_max_json_index, value=g_index, step=1, label="Index", scale=3)
            splitpoint_slider = gr.Slider(
                minimum=0, maximum=120.0, value=0, step=0.1, label="Audio Split Point(s)", scale=3
            )
            btn_audio_split = gr.Button("Split Audio", scale=1)
            btn_save_json = gr.Button("Save File", visible=True, scale=1)
            btn_invert_selection = gr.Button("Invert Selection", scale=1)
        with gr.Row():
            with gr.Column():
                for _ in range(0, g_batch):
                    with gr.Row():
                        text = gr.Textbox(label="Text", visible=True, scale=5)
                        audio_output = gr.Audio(label="Output Audio", visible=True, scale=5)
                        audio_check = gr.Checkbox(label="Yes", show_label=True, info="Choose Audio", scale=1)
                        g_text_list.append(text)
                        g_audio_list.append(audio_output)
                        g_checkbox_list.append(audio_check)
        with gr.Row():
            batchsize_slider = gr.Slider(
                minimum=1, maximum=g_batch, value=g_batch, step=1, label="Batch Size", scale=3, interactive=False
            )
            interval_slider = gr.Slider(minimum=0, maximum=2, value=0, step=0.01, label="Interval", scale=3)
            btn_theme_dark = gr.Button("Light Theme", link="?__theme=light", scale=1)
            btn_theme_light = gr.Button("Dark Theme", link="?__theme=dark", scale=1)
        btn_change_index.click(
            b_change_index,
            inputs=[
                index_slider,
                batchsize_slider,
            ],
            outputs=[*g_text_list, *g_audio_list, *g_checkbox_list],
        )
        btn_submit_change.click(
            b_submit_change,
            inputs=[
                *g_text_list,
            ],
            outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
        )
        btn_previous_index.click(
            b_previous_index,
            inputs=[
                index_slider,
                batchsize_slider,
            ],
            outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
        )
        btn_next_index.click(
            b_next_index,
            inputs=[
                index_slider,
                batchsize_slider,
            ],
            outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
        )
        btn_delete_audio.click(
            b_delete_audio,
            inputs=[*g_checkbox_list],
            outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
        )
        btn_merge_audio.click(
            b_merge_audio,
            inputs=[interval_slider, *g_checkbox_list],
            outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
        )
        btn_audio_split.click(
            b_audio_split,
            inputs=[splitpoint_slider, *g_checkbox_list],
            outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
        )
        btn_invert_selection.click(b_invert_selection, inputs=[*g_checkbox_list], outputs=[*g_checkbox_list])
        btn_save_json.click(b_save_file)
        demo.load(
            b_change_index,
            inputs=[
                index_slider,
                batchsize_slider,
            ],
            outputs=[*g_text_list, *g_audio_list, *g_checkbox_list],
        )
    demo.launch(
        server_name="0.0.0.0",
        inbrowser=True,
        # quiet=True,
        share=eval(args.is_share),
        server_port=int(args.webui_port_subfix),
    )
--- a/tools/uvr5/webui.py
+++ b/tools/uvr5/webui.py
@ -1,23 +1,22 @@
 import logging
 import os
 import sys
 import traceback
 import gradio as gr
 from tools.i18n.i18n import I18nAuto
 from tools.my_utils import clean_path
 i18n = I18nAuto()
 logger = logging.getLogger(__name__)
 import sys
 import ffmpeg
 import gradio as gr
 import torch
 from bsroformer import Roformer_Loader
 from mdxnet import MDXNetDereverb
 from vr import AudioPre, AudioPreDeEcho
 from tools.i18n.i18n import I18nAuto
 from tools.my_utils import clean_path, load_cudnn
 i18n = I18nAuto()
 logger = logging.getLogger(__name__)
 weight_uvr5_root = "tools/uvr5/uvr5_weights"
 uvr5_names = []
 for name in os.listdir(weight_uvr5_root):
@ -44,6 +43,7 @@ def html_center(text, label="p"):
 def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
    infos = []
    load_cudnn()
    try:
        inp_root = clean_path(inp_root)
        save_root_vocal = clean_path(save_root_vocal)
--- a/webui.py
+++ b/webui.py
@ -58,6 +58,7 @@ for site_packages_root in site_packages_roots:
            traceback.print_exc()
 import shutil
 import subprocess
 from multiprocessing import cpu_count
 from subprocess import Popen
 from tools.assets import css, js, top_html
@ -86,14 +87,9 @@ from config import (
 from tools import my_utils
 from tools.my_utils import check_details, check_for_existance
-# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
+language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
-try:
+os.environ["language"] = language
-    import gradio.analytics as analytics
+i18n = I18nAuto(language=language)
    analytics.version_check = lambda: None
 except:
    ...
 import gradio as gr
 n_cpu = cpu_count()
@ -276,12 +272,7 @@ def change_label(path_list):
    if p_label is None:
        check_for_existance([path_list])
        path_list = my_utils.clean_path(path_list)
-        cmd = '"%s" -s tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % (
+        cmd = f'"{python_exec}" -s tools/subfix.py --i18n-lang {language} --port {webui_port_subfix} --share {is_share} "{path_list}"'
            python_exec,
            path_list,
            webui_port_subfix,
            is_share,
        )
        yield (
            process_info(process_name_subfix, "opened"),
            {"__type__": "update", "visible": False},