mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-12-17 01:59:08 +08:00
Add new subfix webui, fix bugs in requirements
This commit is contained in:
parent
ed89a02337
commit
6a2ab63e18
@ -23,8 +23,9 @@ from .utils import load_config
|
||||
onnxruntime.set_default_logger_severity(3)
|
||||
try:
|
||||
onnxruntime.preload_dlls()
|
||||
except:pass
|
||||
#traceback.print_exc()
|
||||
except:
|
||||
pass
|
||||
# traceback.print_exc()
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
model_version = "1.1"
|
||||
|
||||
@ -8,7 +8,7 @@ pytorch-lightning>=2.4
|
||||
gradio<5
|
||||
ffmpeg-python
|
||||
onnxruntime; platform_machine == "aarch64" or platform_machine == "arm64"
|
||||
onnxruntime-gpu; platform_machine == "x86_64" or platform_machine == "AMD64"
|
||||
onnxruntime-gpu; platform_machine == "x86_64" or platform_machine == "amd64"
|
||||
tqdm
|
||||
funasr==1.0.27
|
||||
cn2an
|
||||
|
||||
544
tools/subfix.py
Normal file
544
tools/subfix.py
Normal file
@ -0,0 +1,544 @@
|
||||
import datetime
|
||||
import os
|
||||
import threading
|
||||
import traceback
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from typing import List
|
||||
|
||||
import click
|
||||
import gradio as gr
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile
|
||||
from gradio.components.audio import WaveformOptions
|
||||
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
|
||||
PARTIAL_EXIT = partial(os._exit, 0)
|
||||
|
||||
LANGUAGE_MAP: dict = {
|
||||
"ZH": "ZH",
|
||||
"zh": "ZH",
|
||||
"JP": "JA",
|
||||
"jp": "JA",
|
||||
"JA": "JA",
|
||||
"ja": "JA",
|
||||
"EN": "EN",
|
||||
"en": "EN",
|
||||
"KO": "KO",
|
||||
"ko": "KO",
|
||||
"yue": "YUE",
|
||||
"YUE": "YUE",
|
||||
}
|
||||
|
||||
LOCK = threading.Lock()
|
||||
|
||||
IS_CLI = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubfixErr:
|
||||
error: Exception
|
||||
tracebacks: str
|
||||
|
||||
|
||||
class Subfix:
|
||||
batch_size: int = 2
|
||||
cur_idx: int = 0
|
||||
list_path: str
|
||||
textboxes: List[gr.Textbox] = []
|
||||
audios: List[gr.Audio] = []
|
||||
languages: List[gr.Dropdown] = []
|
||||
selections: List[gr.Checkbox] = []
|
||||
transcriptions_list: List[List[str]] = []
|
||||
|
||||
merge_audio_button: gr.Button
|
||||
delete_audio_button: gr.Button
|
||||
previous_index_button1: gr.Button
|
||||
next_index_button1: gr.Button
|
||||
previous_index_button2: gr.Button
|
||||
next_index_button2: gr.Button
|
||||
index_slider: gr.Slider
|
||||
batch_size_slider: gr.Slider
|
||||
close_button: gr.Button
|
||||
|
||||
def __init__(self, i18n: I18nAuto):
|
||||
self.i18n = i18n
|
||||
with gr.Row(equal_height=True):
|
||||
with gr.Column(scale=2, min_width=160):
|
||||
self.index_slider = gr.Slider(minimum=0, maximum=1, step=1, label=i18n("音频索引"))
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
self.previous_index_button1 = gr.Button(value=i18n("上一页"), elem_id="btn_previous")
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
self.next_index_button1 = gr.Button(value=i18n("下一页"), elem_id="btn_next")
|
||||
with gr.Row(equal_height=True):
|
||||
with gr.Column(scale=2, min_width=160):
|
||||
self.batch_size_slider = gr.Slider(
|
||||
minimum=4, maximum=20, step=2, value=self.batch_size, label=i18n("每页音频条数")
|
||||
)
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
self.merge_audio_button = gr.Button(value=i18n("合并选中音频"))
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
self.delete_audio_button = gr.Button(value=i18n("删除选中音频"))
|
||||
gr.render(
|
||||
inputs=[self.index_slider, self.batch_size_slider],
|
||||
triggers=[self.batch_size_slider.change],
|
||||
)(self._render_text_area)
|
||||
|
||||
@property
|
||||
def max_index(self):
|
||||
return len(self.transcriptions_list)
|
||||
|
||||
def load_list(self, list_path: str):
|
||||
with open(list_path, mode="r", encoding="utf-8") as f:
|
||||
list_data = f.readlines()
|
||||
for idx, transcriptions in enumerate(list_data):
|
||||
data = transcriptions.split("|")
|
||||
if len(data) != 4:
|
||||
print(f"Error Line {idx + 1}: {'|'.join(data)}")
|
||||
continue
|
||||
audio_name, audio_folder, text_language, text = data
|
||||
self.transcriptions_list.append(
|
||||
[
|
||||
audio_name,
|
||||
audio_folder,
|
||||
LANGUAGE_MAP.get(text_language.upper(), text_language.upper()),
|
||||
text.strip("\n").strip(),
|
||||
]
|
||||
)
|
||||
self.list_path = list_path
|
||||
|
||||
def save_list(self):
|
||||
data = []
|
||||
for transcriptions in self.transcriptions_list:
|
||||
data.append("|".join(transcriptions))
|
||||
try:
|
||||
with open(self.list_path, mode="w", encoding="utf-8") as f:
|
||||
f.write("\n".join(data))
|
||||
except Exception as e:
|
||||
return SubfixErr(e, traceback.format_exc())
|
||||
|
||||
def change_index(self, index: int):
|
||||
audios = []
|
||||
texts = []
|
||||
languages = []
|
||||
checkboxs = []
|
||||
with LOCK:
|
||||
for i in range(index, index + self.batch_size):
|
||||
if i <= self.max_index - 1:
|
||||
audios.append(gr.Audio(value=self.transcriptions_list[i][0]))
|
||||
texts.append(gr.Textbox(value=self.transcriptions_list[i][3], label=self.i18n("Text") + f" {i}"))
|
||||
languages.append(gr.Dropdown(value=self.transcriptions_list[i][2]))
|
||||
else:
|
||||
audios.append(gr.Audio(value=None, interactive=False))
|
||||
texts.append(gr.Textbox(value=None, label=self.i18n("Text") + f" {i}", interactive=False))
|
||||
languages.append(gr.Dropdown(value=None, interactive=False))
|
||||
checkboxs = [gr.Checkbox(False) for i in range(self.batch_size)]
|
||||
self.cur_idx = index
|
||||
return *audios, *texts, *languages, *checkboxs
|
||||
|
||||
def next_page(self, index: int):
|
||||
batch_size = self.batch_size
|
||||
max_index = self.max_index - batch_size
|
||||
if max_index <= 0:
|
||||
max_index = 1
|
||||
index = min(index + batch_size, max_index - 1)
|
||||
return gr.Slider(value=index), *self.change_index(index)
|
||||
|
||||
def previous_page(self, index: int):
|
||||
batch_size = self.batch_size
|
||||
index = max(index - batch_size, 0)
|
||||
return gr.Slider(value=index), *self.change_index(index)
|
||||
|
||||
def delete_audio(self, index, *selected):
|
||||
delete_index = [i + index for i, _ in enumerate(selected) if _]
|
||||
delete_index = [i for i in delete_index if i < self.max_index - 1]
|
||||
for idx in delete_index[::-1]:
|
||||
self.transcriptions_list.pop(idx)
|
||||
self.save_list()
|
||||
return gr.Slider(value=index, maximum=self.max_index), *self.change_index(index)
|
||||
|
||||
def submit(self, *input):
|
||||
with LOCK:
|
||||
index = self.cur_idx
|
||||
batch_size = self.batch_size
|
||||
texts = input[: len(input) // 2]
|
||||
languages = input[len(input) // 2 :]
|
||||
if texts is None or languages is None:
|
||||
raise ValueError()
|
||||
for idx in range(index, min(index + batch_size, self.max_index - 1)):
|
||||
self.transcriptions_list[idx][3] = texts[idx - index].strip().strip("\n")
|
||||
self.transcriptions_list[idx][2] = languages[idx - index]
|
||||
result = self.save_list()
|
||||
if isinstance(result, SubfixErr):
|
||||
gr.Warning(str(result.error))
|
||||
print(result.tracebacks)
|
||||
|
||||
def merge_audio(self, index, *selected):
|
||||
batch_size = self.batch_size
|
||||
merge_index = [i + index for i, _ in enumerate(selected) if _]
|
||||
merge_index = [i for i in merge_index if i < self.max_index - 1]
|
||||
if len(merge_index) < 2:
|
||||
return *(gr.skip() for _ in range(batch_size * 3 + 1)), *(gr.Checkbox(False) for _ in range(batch_size))
|
||||
else:
|
||||
merge_texts = []
|
||||
merge_audios = []
|
||||
first_itm_index = merge_index[0]
|
||||
first_itm_path = f"{os.path.splitext(self.transcriptions_list[first_itm_index][0])[0]}_{str(datetime.datetime.now().strftime(r'%Y%m%d_%H%M%S'))}.wav"
|
||||
final_audio_list = []
|
||||
for idx in merge_index:
|
||||
merge_texts.append(self.transcriptions_list[idx][3])
|
||||
merge_audios.append(self.transcriptions_list[idx][0])
|
||||
for idx in merge_index[:0:-1]:
|
||||
self.transcriptions_list.pop(idx)
|
||||
for audio_path in merge_audios:
|
||||
final_audio_list.append(librosa.load(audio_path, sr=32000, mono=True)[0])
|
||||
final_audio_list.append(np.zeros(int(32000 * 0.3)))
|
||||
final_audio_list.pop()
|
||||
final_audio = np.concatenate(final_audio_list)
|
||||
soundfile.write(first_itm_path, final_audio, 32000)
|
||||
self.transcriptions_list[first_itm_index][0] = first_itm_path
|
||||
self.transcriptions_list[first_itm_index][3] = ",".join(merge_texts)
|
||||
return gr.Slider(maximum=self.max_index), *self.change_index(index)
|
||||
|
||||
def _render_text_area(self, index, batch_size):
|
||||
i18n = self.i18n
|
||||
self.textboxes = []
|
||||
self.audios = []
|
||||
self.languages = []
|
||||
self.selections = []
|
||||
self.batch_size = batch_size
|
||||
for i in range(index, index + batch_size):
|
||||
with gr.Row(equal_height=True):
|
||||
if i <= self.max_index - 1:
|
||||
with gr.Column(scale=2, min_width=160):
|
||||
textbox_tmp = gr.Textbox(
|
||||
value=self.transcriptions_list[i][3],
|
||||
label=i18n("Text") + f" {i}",
|
||||
lines=2,
|
||||
max_lines=3,
|
||||
interactive=True,
|
||||
)
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
audio_tmp = gr.Audio(
|
||||
value=self.transcriptions_list[i][0],
|
||||
show_label=False,
|
||||
show_download_button=False,
|
||||
editable=False,
|
||||
waveform_options={"show_recording_waveform": False, "show_controls": False},
|
||||
)
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
with gr.Group():
|
||||
with gr.Row():
|
||||
language_tmp = gr.Dropdown(
|
||||
choices=["ZH", "EN", "JA", "KO", "YUE"],
|
||||
value=self.transcriptions_list[i][2],
|
||||
allow_custom_value=True,
|
||||
label=i18n("文本语言"),
|
||||
interactive=True,
|
||||
)
|
||||
with gr.Row():
|
||||
selection_tmp = gr.Checkbox(
|
||||
label=i18n("选择音频"),
|
||||
)
|
||||
else:
|
||||
with gr.Column(scale=2, min_width=160):
|
||||
textbox_tmp = gr.Textbox(
|
||||
label=i18n("Text") + f" {i}",
|
||||
lines=2,
|
||||
max_lines=3,
|
||||
elem_id="subfix_textbox",
|
||||
interactive=False,
|
||||
)
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
audio_tmp = gr.Audio(
|
||||
streaming=True,
|
||||
show_label=False,
|
||||
show_download_button=False,
|
||||
interactive=False,
|
||||
waveform_options=WaveformOptions(show_recording_waveform=False, show_controls=False),
|
||||
)
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
with gr.Group():
|
||||
with gr.Row():
|
||||
language_tmp = gr.Dropdown(
|
||||
choices=["ZH", "EN", "JA", "KO", "YUE"],
|
||||
value=None,
|
||||
allow_custom_value=True,
|
||||
label=i18n("文本语言"),
|
||||
interactive=False,
|
||||
)
|
||||
with gr.Row():
|
||||
selection_tmp = gr.Checkbox(
|
||||
label=i18n("选择音频"),
|
||||
interactive=False,
|
||||
)
|
||||
|
||||
self.textboxes.append(textbox_tmp)
|
||||
self.audios.append(audio_tmp)
|
||||
self.languages.append(language_tmp)
|
||||
self.selections.append(selection_tmp)
|
||||
with gr.Row(equal_height=True):
|
||||
with gr.Column(scale=2, min_width=160):
|
||||
self.close_button = gr.Button(value=i18n("关闭打标WebUI"), variant="stop")
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
self.previous_index_button2 = gr.Button(value=i18n("上一页"))
|
||||
with gr.Column(scale=1, min_width=160):
|
||||
self.next_index_button2 = gr.Button(value=i18n("下一页"))
|
||||
|
||||
# Event Trigger Binding
|
||||
|
||||
self.index_slider.release( # Change Index Button
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
).success(
|
||||
fn=self.change_index,
|
||||
inputs=[
|
||||
self.index_slider,
|
||||
],
|
||||
outputs=[
|
||||
*self.audios,
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
*self.selections,
|
||||
],
|
||||
max_batch_size=1,
|
||||
trigger_mode="once",
|
||||
)
|
||||
|
||||
self.next_index_button1.click( # Next Page Button on the Top
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
).success(
|
||||
fn=self.next_page,
|
||||
inputs=[
|
||||
self.index_slider,
|
||||
],
|
||||
outputs=[
|
||||
self.index_slider,
|
||||
*self.audios,
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
*self.selections,
|
||||
],
|
||||
scroll_to_output=True,
|
||||
trigger_mode="once",
|
||||
)
|
||||
|
||||
self.next_index_button2.click( # Next Page Button on the Bottom, Binding to Next Page Button on the Top
|
||||
lambda: None,
|
||||
[],
|
||||
[],
|
||||
js="""
|
||||
() => {
|
||||
document.getElementById("btn_next").click();
|
||||
}""",
|
||||
trigger_mode="once",
|
||||
)
|
||||
|
||||
self.previous_index_button1.click( # Previous Page Button on the Top
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
).success(
|
||||
fn=self.previous_page,
|
||||
inputs=[
|
||||
self.index_slider,
|
||||
],
|
||||
outputs=[
|
||||
self.index_slider,
|
||||
*self.audios,
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
*self.selections,
|
||||
],
|
||||
scroll_to_output=True,
|
||||
trigger_mode="once",
|
||||
)
|
||||
|
||||
self.previous_index_button2.click( # Previous Page Button on the Bottom, Binding to Previous Page Button on the Top
|
||||
lambda: None,
|
||||
[],
|
||||
[],
|
||||
js="""
|
||||
() => {
|
||||
document.getElementById("btn_previous").click();
|
||||
}""",
|
||||
trigger_mode="once",
|
||||
)
|
||||
|
||||
self.delete_audio_button.click( # Delete the Audio in the Transcription File
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
).success(
|
||||
fn=self.delete_audio,
|
||||
inputs=[
|
||||
self.index_slider,
|
||||
*self.selections,
|
||||
],
|
||||
outputs=[
|
||||
self.index_slider,
|
||||
*self.audios,
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
*self.selections,
|
||||
],
|
||||
scroll_to_output=True,
|
||||
).success(
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
show_progress="hidden",
|
||||
)
|
||||
|
||||
self.merge_audio_button.click( # Delete the Audio in the Transcription File
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
).success(
|
||||
fn=self.merge_audio,
|
||||
inputs=[
|
||||
self.index_slider,
|
||||
*self.selections,
|
||||
],
|
||||
outputs=[
|
||||
self.index_slider,
|
||||
*self.audios,
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
*self.selections,
|
||||
],
|
||||
scroll_to_output=True,
|
||||
).success(
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
show_progress="hidden",
|
||||
)
|
||||
if not IS_CLI:
|
||||
self.close_button.click( # Close the Subfix Tab, Binding to Close Button on Audio Processing Tab
|
||||
fn=lambda: None,
|
||||
inputs=[],
|
||||
outputs=[],
|
||||
js="""
|
||||
() => {
|
||||
document.getElementById("btn_close").click();
|
||||
}""",
|
||||
trigger_mode="once",
|
||||
)
|
||||
else:
|
||||
self.close_button.click( # Close the Subfix Tab, Binding to Close Button on Audio Processing Tab
|
||||
fn=self.submit,
|
||||
inputs=[
|
||||
*self.textboxes,
|
||||
*self.languages,
|
||||
],
|
||||
outputs=[],
|
||||
trigger_mode="once",
|
||||
).then(
|
||||
fn=PARTIAL_EXIT,
|
||||
inputs=[],
|
||||
outputs=[],
|
||||
)
|
||||
|
||||
def render(self, list_path: str, batch_size: int = 10):
|
||||
self.batch_size = batch_size
|
||||
self.transcriptions_list = []
|
||||
self.load_list(list_path=list_path)
|
||||
|
||||
|
||||
@click.command(name="subfix")
|
||||
@click.argument(
|
||||
"list-path",
|
||||
metavar="<Path>",
|
||||
type=click.Path(exists=True, dir_okay=False, readable=True, writable=True),
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"--i18n-lang",
|
||||
type=str,
|
||||
default="Auto",
|
||||
help="Languages for internationalisation",
|
||||
show_default=True,
|
||||
)
|
||||
@click.option(
|
||||
"--port",
|
||||
type=int,
|
||||
default="9871",
|
||||
show_default=True,
|
||||
)
|
||||
@click.option(
|
||||
"--share",
|
||||
type=bool,
|
||||
default=False,
|
||||
show_default=True,
|
||||
)
|
||||
def main(list_path: str = "", i18n_lang="Auto", port=9871, share=False):
|
||||
"""Web-Based audio subtitle editing and multilingual annotation Tool
|
||||
|
||||
Accept a transcription list path to launch a Gradio WebUI for text editing
|
||||
"""
|
||||
|
||||
with gr.Blocks(analytics_enabled=False) as app:
|
||||
subfix = Subfix(I18nAuto(i18n_lang))
|
||||
subfix.render(list_path=list_path)
|
||||
if subfix.max_index > 0:
|
||||
timer = gr.Timer(0.1)
|
||||
|
||||
timer.tick(
|
||||
fn=lambda: (
|
||||
gr.Slider(value=0, maximum=subfix.max_index),
|
||||
gr.Slider(value=10),
|
||||
gr.Timer(active=False),
|
||||
),
|
||||
inputs=[],
|
||||
outputs=[
|
||||
subfix.index_slider,
|
||||
subfix.batch_size_slider,
|
||||
timer,
|
||||
],
|
||||
)
|
||||
else:
|
||||
timer = gr.Timer(2)
|
||||
|
||||
timer.tick(
|
||||
fn=lambda x: (_ for _ in ()).throw(gr.Error("Invalid List")) if x is None else None,
|
||||
inputs=[],
|
||||
outputs=[],
|
||||
)
|
||||
app.queue().launch(
|
||||
server_name="0.0.0.0",
|
||||
inbrowser=True,
|
||||
share=share,
|
||||
server_port=port,
|
||||
quiet=False,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,422 +0,0 @@
|
||||
import sys
|
||||
from tools.i18n.i18n import I18nAuto, scan_language_list
|
||||
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
|
||||
i18n = I18nAuto(language=language)
|
||||
import argparse
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
|
||||
try:
|
||||
import gradio.analytics as analytics
|
||||
|
||||
analytics.version_check = lambda: None
|
||||
except:
|
||||
...
|
||||
|
||||
import gradio as gr
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile
|
||||
|
||||
g_json_key_text = ""
|
||||
g_json_key_path = ""
|
||||
g_load_file = ""
|
||||
g_load_format = ""
|
||||
|
||||
g_max_json_index = 0
|
||||
g_index = 0
|
||||
g_batch = 10
|
||||
g_text_list = []
|
||||
g_audio_list = []
|
||||
g_checkbox_list = []
|
||||
g_data_json = []
|
||||
|
||||
|
||||
def reload_data(index, batch):
|
||||
global g_index
|
||||
g_index = index
|
||||
global g_batch
|
||||
g_batch = batch
|
||||
datas = g_data_json[index : index + batch]
|
||||
output = []
|
||||
for d in datas:
|
||||
output.append({g_json_key_text: d[g_json_key_text], g_json_key_path: d[g_json_key_path]})
|
||||
return output
|
||||
|
||||
|
||||
def b_change_index(index, batch):
|
||||
global g_index, g_batch
|
||||
g_index, g_batch = index, batch
|
||||
datas = reload_data(index, batch)
|
||||
output = []
|
||||
for i, _ in enumerate(datas):
|
||||
output.append(
|
||||
# gr.Textbox(
|
||||
# label=f"Text {i+index}",
|
||||
# value=_[g_json_key_text]#text
|
||||
# )
|
||||
{"__type__": "update", "label": f"Text {i + index}", "value": _[g_json_key_text]}
|
||||
)
|
||||
for _ in range(g_batch - len(datas)):
|
||||
output.append(
|
||||
# gr.Textbox(
|
||||
# label=f"Text",
|
||||
# value=""
|
||||
# )
|
||||
{"__type__": "update", "label": "Text", "value": ""}
|
||||
)
|
||||
for _ in datas:
|
||||
output.append(_[g_json_key_path])
|
||||
for _ in range(g_batch - len(datas)):
|
||||
output.append(None)
|
||||
for _ in range(g_batch):
|
||||
output.append(False)
|
||||
return output
|
||||
|
||||
|
||||
def b_next_index(index, batch):
|
||||
b_save_file()
|
||||
if (index + batch) <= g_max_json_index:
|
||||
return index + batch, *b_change_index(index + batch, batch)
|
||||
else:
|
||||
return index, *b_change_index(index, batch)
|
||||
|
||||
|
||||
def b_previous_index(index, batch):
|
||||
b_save_file()
|
||||
if (index - batch) >= 0:
|
||||
return index - batch, *b_change_index(index - batch, batch)
|
||||
else:
|
||||
return 0, *b_change_index(0, batch)
|
||||
|
||||
|
||||
def b_submit_change(*text_list):
|
||||
global g_data_json
|
||||
change = False
|
||||
for i, new_text in enumerate(text_list):
|
||||
if g_index + i <= g_max_json_index:
|
||||
new_text = new_text.strip() + " "
|
||||
if g_data_json[g_index + i][g_json_key_text] != new_text:
|
||||
g_data_json[g_index + i][g_json_key_text] = new_text
|
||||
change = True
|
||||
if change:
|
||||
b_save_file()
|
||||
return g_index, *b_change_index(g_index, g_batch)
|
||||
|
||||
|
||||
def b_delete_audio(*checkbox_list):
|
||||
global g_data_json, g_index, g_max_json_index
|
||||
b_save_file()
|
||||
change = False
|
||||
for i, checkbox in reversed(list(enumerate(checkbox_list))):
|
||||
if g_index + i < len(g_data_json):
|
||||
if checkbox == True:
|
||||
g_data_json.pop(g_index + i)
|
||||
change = True
|
||||
|
||||
g_max_json_index = len(g_data_json) - 1
|
||||
if g_index > g_max_json_index:
|
||||
g_index = g_max_json_index
|
||||
g_index = g_index if g_index >= 0 else 0
|
||||
if change:
|
||||
b_save_file()
|
||||
# return gr.Slider(value=g_index, maximum=(g_max_json_index if g_max_json_index>=0 else 0)), *b_change_index(g_index, g_batch)
|
||||
return {
|
||||
"value": g_index,
|
||||
"__type__": "update",
|
||||
"maximum": (g_max_json_index if g_max_json_index >= 0 else 0),
|
||||
}, *b_change_index(g_index, g_batch)
|
||||
|
||||
|
||||
def b_invert_selection(*checkbox_list):
|
||||
new_list = [not item if item is True else True for item in checkbox_list]
|
||||
return new_list
|
||||
|
||||
|
||||
def get_next_path(filename):
|
||||
base_dir = os.path.dirname(filename)
|
||||
base_name = os.path.splitext(os.path.basename(filename))[0]
|
||||
for i in range(100):
|
||||
new_path = os.path.join(base_dir, f"{base_name}_{str(i).zfill(2)}.wav")
|
||||
if not os.path.exists(new_path):
|
||||
return new_path
|
||||
return os.path.join(base_dir, f"{str(uuid.uuid4())}.wav")
|
||||
|
||||
|
||||
def b_audio_split(audio_breakpoint, *checkbox_list):
|
||||
global g_data_json, g_max_json_index
|
||||
checked_index = []
|
||||
for i, checkbox in enumerate(checkbox_list):
|
||||
if checkbox == True and g_index + i < len(g_data_json):
|
||||
checked_index.append(g_index + i)
|
||||
if len(checked_index) == 1:
|
||||
index = checked_index[0]
|
||||
audio_json = copy.deepcopy(g_data_json[index])
|
||||
path = audio_json[g_json_key_path]
|
||||
data, sample_rate = librosa.load(path, sr=None, mono=True)
|
||||
audio_maxframe = len(data)
|
||||
break_frame = int(audio_breakpoint * sample_rate)
|
||||
|
||||
if break_frame >= 1 and break_frame < audio_maxframe:
|
||||
audio_first = data[0:break_frame]
|
||||
audio_second = data[break_frame:]
|
||||
nextpath = get_next_path(path)
|
||||
soundfile.write(nextpath, audio_second, sample_rate)
|
||||
soundfile.write(path, audio_first, sample_rate)
|
||||
g_data_json.insert(index + 1, audio_json)
|
||||
g_data_json[index + 1][g_json_key_path] = nextpath
|
||||
b_save_file()
|
||||
|
||||
g_max_json_index = len(g_data_json) - 1
|
||||
# return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch)
|
||||
return {"value": g_index, "maximum": g_max_json_index, "__type__": "update"}, *b_change_index(g_index, g_batch)
|
||||
|
||||
|
||||
def b_merge_audio(interval_r, *checkbox_list):
|
||||
global g_data_json, g_max_json_index
|
||||
b_save_file()
|
||||
checked_index = []
|
||||
audios_path = []
|
||||
audios_text = []
|
||||
for i, checkbox in enumerate(checkbox_list):
|
||||
if checkbox == True and g_index + i < len(g_data_json):
|
||||
checked_index.append(g_index + i)
|
||||
|
||||
if len(checked_index) > 1:
|
||||
for i in checked_index:
|
||||
audios_path.append(g_data_json[i][g_json_key_path])
|
||||
audios_text.append(g_data_json[i][g_json_key_text])
|
||||
for i in reversed(checked_index[1:]):
|
||||
g_data_json.pop(i)
|
||||
|
||||
base_index = checked_index[0]
|
||||
base_path = audios_path[0]
|
||||
g_data_json[base_index][g_json_key_text] = "".join(audios_text)
|
||||
|
||||
audio_list = []
|
||||
l_sample_rate = None
|
||||
for i, path in enumerate(audios_path):
|
||||
data, sample_rate = librosa.load(path, sr=l_sample_rate, mono=True)
|
||||
l_sample_rate = sample_rate
|
||||
if i > 0:
|
||||
silence = np.zeros(int(l_sample_rate * interval_r))
|
||||
audio_list.append(silence)
|
||||
|
||||
audio_list.append(data)
|
||||
|
||||
audio_concat = np.concatenate(audio_list)
|
||||
|
||||
soundfile.write(base_path, audio_concat, l_sample_rate)
|
||||
|
||||
b_save_file()
|
||||
|
||||
g_max_json_index = len(g_data_json) - 1
|
||||
|
||||
# return gr.Slider(value=g_index, maximum=g_max_json_index), *b_change_index(g_index, g_batch)
|
||||
return {"value": g_index, "maximum": g_max_json_index, "__type__": "update"}, *b_change_index(g_index, g_batch)
|
||||
|
||||
|
||||
def b_save_json():
|
||||
with open(g_load_file, "w", encoding="utf-8") as file:
|
||||
for data in g_data_json:
|
||||
file.write(f"{json.dumps(data, ensure_ascii=False)}\n")
|
||||
|
||||
|
||||
def b_save_list():
|
||||
with open(g_load_file, "w", encoding="utf-8") as file:
|
||||
for data in g_data_json:
|
||||
wav_path = data["wav_path"]
|
||||
speaker_name = data["speaker_name"]
|
||||
language = data["language"]
|
||||
text = data["text"]
|
||||
file.write(f"{wav_path}|{speaker_name}|{language}|{text}".strip() + "\n")
|
||||
|
||||
|
||||
def b_load_json():
|
||||
global g_data_json, g_max_json_index
|
||||
with open(g_load_file, "r", encoding="utf-8") as file:
|
||||
g_data_json = file.readlines()
|
||||
g_data_json = [json.loads(line) for line in g_data_json]
|
||||
g_max_json_index = len(g_data_json) - 1
|
||||
|
||||
|
||||
def b_load_list():
|
||||
global g_data_json, g_max_json_index
|
||||
with open(g_load_file, "r", encoding="utf-8") as source:
|
||||
data_list = source.readlines()
|
||||
for _ in data_list:
|
||||
data = _.split("|")
|
||||
if len(data) == 4:
|
||||
wav_path, speaker_name, language, text = data
|
||||
g_data_json.append(
|
||||
{"wav_path": wav_path, "speaker_name": speaker_name, "language": language, "text": text.strip()}
|
||||
)
|
||||
else:
|
||||
print("error line:", data)
|
||||
g_max_json_index = len(g_data_json) - 1
|
||||
|
||||
|
||||
def b_save_file():
|
||||
if g_load_format == "json":
|
||||
b_save_json()
|
||||
elif g_load_format == "list":
|
||||
b_save_list()
|
||||
|
||||
|
||||
def b_load_file():
|
||||
if g_load_format == "json":
|
||||
b_load_json()
|
||||
elif g_load_format == "list":
|
||||
b_load_list()
|
||||
|
||||
|
||||
def set_global(load_json, load_list, json_key_text, json_key_path, batch):
|
||||
global g_json_key_text, g_json_key_path, g_load_file, g_load_format, g_batch
|
||||
|
||||
g_batch = int(batch)
|
||||
|
||||
if load_json != "None":
|
||||
g_load_format = "json"
|
||||
g_load_file = load_json
|
||||
elif load_list != "None":
|
||||
g_load_format = "list"
|
||||
g_load_file = load_list
|
||||
else:
|
||||
g_load_format = "list"
|
||||
g_load_file = "demo.list"
|
||||
|
||||
g_json_key_text = json_key_text
|
||||
g_json_key_path = json_key_path
|
||||
|
||||
b_load_file()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Process some integers.")
|
||||
parser.add_argument("--load_json", default="None", help="source file, like demo.json")
|
||||
parser.add_argument("--is_share", default="False", help="whether webui is_share=True")
|
||||
parser.add_argument("--load_list", default="None", help="source file, like demo.list")
|
||||
parser.add_argument("--webui_port_subfix", default=9871, help="source file, like demo.list")
|
||||
parser.add_argument("--json_key_text", default="text", help="the text key name in json, Default: text")
|
||||
parser.add_argument("--json_key_path", default="wav_path", help="the path key name in json, Default: wav_path")
|
||||
parser.add_argument("--g_batch", default=10, help="max number g_batch wav to display, Default: 10")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
set_global(args.load_json, args.load_list, args.json_key_text, args.json_key_path, args.g_batch)
|
||||
|
||||
with gr.Blocks(analytics_enabled=False) as demo:
|
||||
gr.Markdown(
|
||||
value=i18n("Submit Text: 将当前页所有文本框内容手工保存到内存和文件(翻页前后或者退出标注页面前如果没点这个按钮,你再翻回来就回滚了,白忙活。)")
|
||||
)
|
||||
with gr.Row():
|
||||
btn_change_index = gr.Button("Change Index")
|
||||
btn_submit_change = gr.Button("Submit Text")
|
||||
btn_merge_audio = gr.Button("Merge Audio")
|
||||
btn_delete_audio = gr.Button("Delete Audio")
|
||||
btn_previous_index = gr.Button("Previous Index")
|
||||
btn_next_index = gr.Button("Next Index")
|
||||
|
||||
with gr.Row():
|
||||
index_slider = gr.Slider(minimum=0, maximum=g_max_json_index, value=g_index, step=1, label="Index", scale=3)
|
||||
splitpoint_slider = gr.Slider(
|
||||
minimum=0, maximum=120.0, value=0, step=0.1, label="Audio Split Point(s)", scale=3
|
||||
)
|
||||
btn_audio_split = gr.Button("Split Audio", scale=1)
|
||||
btn_save_json = gr.Button("Save File", visible=True, scale=1)
|
||||
btn_invert_selection = gr.Button("Invert Selection", scale=1)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
for _ in range(0, g_batch):
|
||||
with gr.Row():
|
||||
text = gr.Textbox(label="Text", visible=True, scale=5)
|
||||
audio_output = gr.Audio(label="Output Audio", visible=True, scale=5)
|
||||
audio_check = gr.Checkbox(label="Yes", show_label=True, info="Choose Audio", scale=1)
|
||||
g_text_list.append(text)
|
||||
g_audio_list.append(audio_output)
|
||||
g_checkbox_list.append(audio_check)
|
||||
|
||||
with gr.Row():
|
||||
batchsize_slider = gr.Slider(
|
||||
minimum=1, maximum=g_batch, value=g_batch, step=1, label="Batch Size", scale=3, interactive=False
|
||||
)
|
||||
interval_slider = gr.Slider(minimum=0, maximum=2, value=0, step=0.01, label="Interval", scale=3)
|
||||
btn_theme_dark = gr.Button("Light Theme", link="?__theme=light", scale=1)
|
||||
btn_theme_light = gr.Button("Dark Theme", link="?__theme=dark", scale=1)
|
||||
|
||||
btn_change_index.click(
|
||||
b_change_index,
|
||||
inputs=[
|
||||
index_slider,
|
||||
batchsize_slider,
|
||||
],
|
||||
outputs=[*g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
btn_submit_change.click(
|
||||
b_submit_change,
|
||||
inputs=[
|
||||
*g_text_list,
|
||||
],
|
||||
outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
btn_previous_index.click(
|
||||
b_previous_index,
|
||||
inputs=[
|
||||
index_slider,
|
||||
batchsize_slider,
|
||||
],
|
||||
outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
btn_next_index.click(
|
||||
b_next_index,
|
||||
inputs=[
|
||||
index_slider,
|
||||
batchsize_slider,
|
||||
],
|
||||
outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
btn_delete_audio.click(
|
||||
b_delete_audio,
|
||||
inputs=[*g_checkbox_list],
|
||||
outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
btn_merge_audio.click(
|
||||
b_merge_audio,
|
||||
inputs=[interval_slider, *g_checkbox_list],
|
||||
outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
btn_audio_split.click(
|
||||
b_audio_split,
|
||||
inputs=[splitpoint_slider, *g_checkbox_list],
|
||||
outputs=[index_slider, *g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
btn_invert_selection.click(b_invert_selection, inputs=[*g_checkbox_list], outputs=[*g_checkbox_list])
|
||||
|
||||
btn_save_json.click(b_save_file)
|
||||
|
||||
demo.load(
|
||||
b_change_index,
|
||||
inputs=[
|
||||
index_slider,
|
||||
batchsize_slider,
|
||||
],
|
||||
outputs=[*g_text_list, *g_audio_list, *g_checkbox_list],
|
||||
)
|
||||
|
||||
demo.launch(
|
||||
server_name="0.0.0.0",
|
||||
inbrowser=True,
|
||||
# quiet=True,
|
||||
share=eval(args.is_share),
|
||||
server_port=int(args.webui_port_subfix),
|
||||
)
|
||||
@ -1,23 +1,22 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import gradio as gr
|
||||
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from tools.my_utils import clean_path
|
||||
|
||||
i18n = I18nAuto()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import sys
|
||||
|
||||
import ffmpeg
|
||||
import gradio as gr
|
||||
import torch
|
||||
from bsroformer import Roformer_Loader
|
||||
from mdxnet import MDXNetDereverb
|
||||
from vr import AudioPre, AudioPreDeEcho
|
||||
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
from tools.my_utils import clean_path, load_cudnn
|
||||
|
||||
i18n = I18nAuto()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
weight_uvr5_root = "tools/uvr5/uvr5_weights"
|
||||
uvr5_names = []
|
||||
for name in os.listdir(weight_uvr5_root):
|
||||
@ -44,6 +43,7 @@ def html_center(text, label="p"):
|
||||
|
||||
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
|
||||
infos = []
|
||||
load_cudnn()
|
||||
try:
|
||||
inp_root = clean_path(inp_root)
|
||||
save_root_vocal = clean_path(save_root_vocal)
|
||||
|
||||
19
webui.py
19
webui.py
@ -58,6 +58,7 @@ for site_packages_root in site_packages_roots:
|
||||
traceback.print_exc()
|
||||
import shutil
|
||||
import subprocess
|
||||
from multiprocessing import cpu_count
|
||||
from subprocess import Popen
|
||||
|
||||
from tools.assets import css, js, top_html
|
||||
@ -86,14 +87,9 @@ from config import (
|
||||
from tools import my_utils
|
||||
from tools.my_utils import check_details, check_for_existance
|
||||
|
||||
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
|
||||
try:
|
||||
import gradio.analytics as analytics
|
||||
|
||||
analytics.version_check = lambda: None
|
||||
except:
|
||||
...
|
||||
import gradio as gr
|
||||
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
|
||||
os.environ["language"] = language
|
||||
i18n = I18nAuto(language=language)
|
||||
|
||||
n_cpu = cpu_count()
|
||||
|
||||
@ -276,12 +272,7 @@ def change_label(path_list):
|
||||
if p_label is None:
|
||||
check_for_existance([path_list])
|
||||
path_list = my_utils.clean_path(path_list)
|
||||
cmd = '"%s" -s tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s' % (
|
||||
python_exec,
|
||||
path_list,
|
||||
webui_port_subfix,
|
||||
is_share,
|
||||
)
|
||||
cmd = f'"{python_exec}" -s tools/subfix.py --i18n-lang {language} --port {webui_port_subfix} --share {is_share} "{path_list}"'
|
||||
yield (
|
||||
process_info(process_name_subfix, "opened"),
|
||||
{"__type__": "update", "visible": False},
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user