From 37ae8bf051c4ae43869cf799831ae19d5df1557d Mon Sep 17 00:00:00 2001 From: Yuan-Man <68322456+Yuan-ManX@users.noreply.github.com> Date: Wed, 14 Feb 2024 20:32:26 +0800 Subject: [PATCH 01/20] Update es_ES.json --- i18n/locale/es_ES.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/i18n/locale/es_ES.json b/i18n/locale/es_ES.json index 5445b698..3bcd2a38 100644 --- a/i18n/locale/es_ES.json +++ b/i18n/locale/es_ES.json @@ -8,8 +8,16 @@ "是否开启UVR5-WebUI": "¿Habilitar UVR5-WebUI?", "UVR5进程输出信息": "Información de salida del proceso UVR5", "0b-语音切分工具": "0b-Herramienta de división de voz", + ".list标注文件的路径": "Ruta del archivo de anotación .list", + "GPT模型列表": "Lista de modelos GPT", + "SoVITS模型列表": "Lista de modelos SoVITS", + "填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名(不是全路径)。": "Directorio donde se guardan los archivos de audio después del corte! Ruta completa del archivo de audio a leer = este directorio - nombre de archivo correspondiente a la forma de onda en el archivo de lista (no la ruta completa).", "音频自动切分输入路径,可文件可文件夹": "Ruta de entrada para la división automática de audio, puede ser un archivo o una carpeta", "切分后的子音频的输出根目录": "Directorio raíz de salida de los sub-audios después de la división", + "怎么切": "Cómo cortar", + "不切": "No cortar", + "凑四句一切": "Completa cuatro oraciones para rellenar todo", + "按英文句号.切": "Cortar por puntos en inglés.", "threshold:音量小于这个值视作静音的备选切割点": "umbral: puntos de corte alternativos considerados como silencio si el volumen es menor que este valor", "min_length:每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值": "min_length: duración mínima de cada segmento, si el primer segmento es demasiado corto, se conecta continuamente con los siguientes hasta que supera este valor", "min_interval:最短切割间隔": "min_interval: intervalo mínimo de corte", From f49d60d6bb7fec124ff859431b048fe423b59627 Mon Sep 17 00:00:00 2001 From: Tundra Work Date: Sun, 18 Feb 2024 07:13:09 +0000 Subject: [PATCH 02/20] fix: 1A-Dataset formatting doesn't work if using a empty 'Audio dataset folder' --- GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index b8355dd4..9e137a9f 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -98,7 +98,7 @@ for line in lines[int(i_part)::int(all_parts)]: try: # wav_name,text=line.split("\t") wav_name, spk_name, language, text = line.split("|") - if (inp_wav_dir !=None): + if (inp_wav_dir != ""): wav_name = os.path.basename(wav_name) wav_path = "%s/%s"%(inp_wav_dir, wav_name) From 26e6fe6b15e9259c0f4b77b5435c5fe5efbbff43 Mon Sep 17 00:00:00 2001 From: yukannoshonen <151692166+idkdik2@users.noreply.github.com> Date: Sun, 18 Feb 2024 14:50:20 -0300 Subject: [PATCH 03/20] Add inference-only --- GPT_SoVITS_Inference.ipynb | 152 +++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 GPT_SoVITS_Inference.ipynb diff --git a/GPT_SoVITS_Inference.ipynb b/GPT_SoVITS_Inference.ipynb new file mode 100644 index 00000000..a5b55325 --- /dev/null +++ b/GPT_SoVITS_Inference.ipynb @@ -0,0 +1,152 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Credits for bubarino giving me the huggingface import code (感谢 bubarino 给了我 huggingface 导入代码)" + ], + "metadata": { + "id": "himHYZmra7ix" + } + }, + { + "cell_type": "code", + "metadata": { + "id": "e9b7iFV3dm1f" + }, + "source": [ + "!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n", + "%cd GPT-SoVITS\n", + "!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n", + "!pip install -r requirements.txt" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Download pretrained models 下载预训练模型\n", + "!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n", + "!mkdir -p /content/GPT-SoVITS/tools/damo_asr/models\n", + "!mkdir -p /content/GPT-SoVITS/tools/uvr5\n", + "%cd /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n", + "!git clone https://huggingface.co/lj1995/GPT-SoVITS\n", + "%cd /content/GPT-SoVITS/tools/damo_asr/models\n", + "!git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git\n", + "!git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git\n", + "!git clone https://www.modelscope.cn/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch.git\n", + "# @title UVR5 pretrains 安装uvr5模型\n", + "%cd /content/GPT-SoVITS/tools/uvr5\n", + "!git clone https://huggingface.co/Delik/uvr5_weights\n", + "!git config core.sparseCheckout true\n", + "!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/" + ], + "metadata": { + "id": "0NgxXg5sjv7z", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title Create folder models 创建文件夹模型\n", + "import os\n", + "base_directory = \"/content/GPT-SoVITS\"\n", + "folder_names = [\"SoVITS_weights\", \"GPT_weights\"]\n", + "\n", + "for folder_name in folder_names:\n", + " if os.path.exists(os.path.join(base_directory, folder_name)):\n", + " print(f\"The folder '{folder_name}' already exists. (文件夹'{folder_name}'已经存在。)\")\n", + " else:\n", + " os.makedirs(os.path.join(base_directory, folder_name))\n", + " print(f\"The folder '{folder_name}' was created successfully! (文件夹'{folder_name}'已成功创建!)\")\n", + "\n", + "print(\"All folders have been created. (所有文件夹均已创建。)\")" + ], + "metadata": { + "cellView": "form", + "id": "cPDEH-9czOJF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "import zipfile\n", + "import shutil\n", + "import os\n", + "\n", + "#@title Import model 导入模型 (HuggingFace)\n", + "hf_link = 'https://huggingface.co/modelloosrvcc/Nagisa_Shingetsu_GPT-SoVITS/resolve/main/Nagisa.zip' #@param {type: \"string\"}\n", + "\n", + "output_path = '/content/'\n", + "\n", + "response = requests.get(hf_link)\n", + "with open(output_path + 'file.zip', 'wb') as file:\n", + " file.write(response.content)\n", + "\n", + "with zipfile.ZipFile(output_path + 'file.zip', 'r') as zip_ref:\n", + " zip_ref.extractall(output_path)\n", + "\n", + "os.remove(output_path + \"file.zip\")\n", + "\n", + "source_directory = output_path\n", + "SoVITS_destination_directory = '/content/GPT-SoVITS/SoVITS_weights'\n", + "GPT_destination_directory = '/content/GPT-SoVITS/GPT_weights'\n", + "\n", + "for filename in os.listdir(source_directory):\n", + " if filename.endswith(\".pth\"):\n", + " source_path = os.path.join(source_directory, filename)\n", + " destination_path = os.path.join(SoVITS_destination_directory, filename)\n", + " shutil.move(source_path, destination_path)\n", + "\n", + "for filename in os.listdir(source_directory):\n", + " if filename.endswith(\".ckpt\"):\n", + " source_path = os.path.join(source_directory, filename)\n", + " destination_path = os.path.join(GPT_destination_directory, filename)\n", + " shutil.move(source_path, destination_path)\n", + "\n", + "print(f'Model downloaded. (模型已下载。)')" + ], + "metadata": { + "cellView": "form", + "id": "vbZY-LnM0tzq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title launch WebUI 启动WebUI\n", + "!/usr/local/bin/pip install ipykernel\n", + "!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n", + "%cd /content/GPT-SoVITS/\n", + "!/usr/local/bin/python webui.py" + ], + "metadata": { + "id": "4oRGUzkrk8C7", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From d36c7fa1dffa609902935f3aba6a12d462c8c95f Mon Sep 17 00:00:00 2001 From: Lion Date: Mon, 19 Feb 2024 19:28:32 +0800 Subject: [PATCH 04/20] Update README --- README.md | 2 +- docs/cn/README.md | 2 +- docs/ja/README.md | 2 +- docs/ko/README.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 72f3694f..4266970f 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ For Chinese ASR (additionally), download models from [Damo ASR Model](https://mo If you are a Mac user, make sure you meet the following conditions for training and inferencing with GPU: -- Mac computers with Apple silicon or AMD GPUs +- Mac computers with Apple silicon - macOS 12.3 or later - Xcode command-line tools installed by running `xcode-select --install` diff --git a/docs/cn/README.md b/docs/cn/README.md index 8d3ca49a..4e47e21b 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -49,7 +49,7 @@ _注意: numba==0.56.4 需要 python<3.11_ 如果你是 Mac 用户,请先确保满足以下条件以使用 GPU 进行训练和推理: -- 搭载 Apple 芯片或 AMD GPU 的 Mac +- 搭载 Apple 芯片 的 Mac - macOS 12.3 或更高版本 - 已通过运行`xcode-select --install`安装 Xcode command-line tools diff --git a/docs/ja/README.md b/docs/ja/README.md index aa300c86..b27fd652 100644 --- a/docs/ja/README.md +++ b/docs/ja/README.md @@ -47,7 +47,7 @@ _注記: numba==0.56.4 は py<3.11 が必要です_ 如果あなたが Mac ユーザーである場合、GPU を使用してトレーニングおよび推論を行うために以下の条件を満たしていることを確認してください: -- Apple シリコンまたは AMD GPU を搭載した Mac コンピューター +- Apple シリコンを搭載した Mac コンピューター - macOS 12.3 以降 - `xcode-select --install`を実行してインストールされた Xcode コマンドラインツール diff --git a/docs/ko/README.md b/docs/ko/README.md index afcdd667..c57cf5cb 100644 --- a/docs/ko/README.md +++ b/docs/ko/README.md @@ -49,7 +49,7 @@ _참고: numba==0.56.4 는 python<3.11 을 필요로 합니다._ MacOS 사용자는 GPU를 사용하여 훈련 및 추론을 하려면 다음 조건을 충족해야 합니다: -- Apple 칩 또는 AMD GPU가 장착된 Mac +- Apple 칩이 장착된 Mac - macOS 12.3 이상 - `xcode-select --install`을 실행하여 Xcode command-line tools를 설치했습니다. From 555c52b0aa93b08ba05621b349e6e61f243f1b3d Mon Sep 17 00:00:00 2001 From: Lion Date: Mon, 19 Feb 2024 19:29:57 +0800 Subject: [PATCH 05/20] Update README --- docs/cn/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cn/README.md b/docs/cn/README.md index 4e47e21b..a0cfd0a0 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -49,7 +49,7 @@ _注意: numba==0.56.4 需要 python<3.11_ 如果你是 Mac 用户,请先确保满足以下条件以使用 GPU 进行训练和推理: -- 搭载 Apple 芯片 的 Mac +- 搭载 Apple 芯片的 Mac - macOS 12.3 或更高版本 - 已通过运行`xcode-select --install`安装 Xcode command-line tools From bbef82fa86cb99ba08c6c71d8144e51689b7bc7e Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Tue, 20 Feb 2024 22:41:39 +0800 Subject: [PATCH 06/20] Refactoring get phones and bert --- GPT_SoVITS/inference_webui.py | 167 +++++++++++----------------------- GPT_SoVITS/text/chinese.py | 2 +- requirements.txt | 2 +- 3 files changed, 55 insertions(+), 116 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 407437f4..70519dab 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -209,54 +209,8 @@ dict_language = { } -def splite_en_inf(sentence, language): - pattern = re.compile(r'[a-zA-Z ]+') - textlist = [] - langlist = [] - pos = 0 - for match in pattern.finditer(sentence): - start, end = match.span() - if start > pos: - textlist.append(sentence[pos:start]) - langlist.append(language) - textlist.append(sentence[start:end]) - langlist.append("en") - pos = end - if pos < len(sentence): - textlist.append(sentence[pos:]) - langlist.append(language) - # Merge punctuation into previous word - for i in range(len(textlist)-1, 0, -1): - if re.match(r'^[\W_]+$', textlist[i]): - textlist[i-1] += textlist[i] - del textlist[i] - del langlist[i] - # Merge consecutive words with the same language tag - i = 0 - while i < len(langlist) - 1: - if langlist[i] == langlist[i+1]: - textlist[i] += textlist[i+1] - del textlist[i+1] - del langlist[i+1] - else: - i += 1 - - return textlist, langlist - - def clean_text_inf(text, language): - formattext = "" - language = language.replace("all_","") - for tmp in LangSegment.getTexts(text): - if language == "ja": - if tmp["lang"] == language or tmp["lang"] == "zh": - formattext += tmp["text"] + " " - continue - if tmp["lang"] == language: - formattext += tmp["text"] + " " - while " " in formattext: - formattext = formattext.replace(" ", " ") - phones, word2ph, norm_text = clean_text(formattext, language) + phones, word2ph, norm_text = clean_text(text, language) phones = cleaned_text_to_sequence(phones) return phones, word2ph, norm_text @@ -274,55 +228,6 @@ def get_bert_inf(phones, word2ph, norm_text, language): return bert -def nonen_clean_text_inf(text, language): - if(language!="auto"): - textlist, langlist = splite_en_inf(text, language) - else: - textlist=[] - langlist=[] - for tmp in LangSegment.getTexts(text): - langlist.append(tmp["lang"]) - textlist.append(tmp["text"]) - phones_list = [] - word2ph_list = [] - norm_text_list = [] - for i in range(len(textlist)): - lang = langlist[i] - phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) - phones_list.append(phones) - if lang == "zh": - word2ph_list.append(word2ph) - norm_text_list.append(norm_text) - print(word2ph_list) - phones = sum(phones_list, []) - word2ph = sum(word2ph_list, []) - norm_text = ' '.join(norm_text_list) - - return phones, word2ph, norm_text - - -def nonen_get_bert_inf(text, language): - if(language!="auto"): - textlist, langlist = splite_en_inf(text, language) - else: - textlist=[] - langlist=[] - for tmp in LangSegment.getTexts(text): - langlist.append(tmp["lang"]) - textlist.append(tmp["text"]) - print(textlist) - print(langlist) - bert_list = [] - for i in range(len(textlist)): - lang = langlist[i] - phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) - bert = get_bert_inf(phones, word2ph, norm_text, lang) - bert_list.append(bert) - bert = torch.cat(bert_list, dim=1) - - return bert - - splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", } @@ -332,23 +237,59 @@ def get_first(text): return text -def get_cleaned_text_final(text,language): +def get_phones_and_bert(text,language): if language in {"en","all_zh","all_ja"}: - phones, word2ph, norm_text = clean_text_inf(text, language) + language = language.replace("all_","") + if language == "en": + LangSegment.setfilters(["en"]) + formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text)) + else: + # 因无法区别中日文汉字,以用户输入为准 + formattext = text + while " " in formattext: + formattext = formattext.replace(" ", " ") + phones, word2ph, norm_text = clean_text_inf(formattext, language) + if language == "zh": + bert = get_bert_feature(norm_text, word2ph).to(device) + else: + bert = torch.zeros( + (1024, len(phones)), + dtype=torch.float16 if is_half == True else torch.float32, + ).to(device) elif language in {"zh", "ja","auto"}: - phones, word2ph, norm_text = nonen_clean_text_inf(text, language) - return phones, word2ph, norm_text + textlist=[] + langlist=[] + LangSegment.setfilters(["zh","ja","en"]) + if language == "auto": + for tmp in LangSegment.getTexts(text): + langlist.append(tmp["lang"]) + textlist.append(tmp["text"]) + else: + for tmp in LangSegment.getTexts(text): + if tmp["lang"] == "en": + langlist.append(tmp["lang"]) + else: + # 因无法区别中日文汉字,以用户输入为准 + langlist.append(language) + textlist.append(tmp["text"]) + print(textlist) + print(langlist) + phones_list = [] + bert_list = [] + norm_text_list = [] + for i in range(len(textlist)): + lang = langlist[i] + phones, word2ph, norm_text = clean_text_inf(textlist[i], lang) + bert = get_bert_inf(phones, word2ph, norm_text, lang) + phones_list.append(phones) + norm_text_list.append(norm_text) + bert_list.append(bert) + bert = torch.cat(bert_list, dim=1) + phones = sum(phones_list, []) + norm_text = ' '.join(norm_text_list) + + return phones,bert.to(dtype),norm_text -def get_bert_final(phones, word2ph, text,language,device): - if language == "en": - bert = get_bert_inf(phones, word2ph, text, language) - elif language in {"zh", "ja","auto"}: - bert = nonen_get_bert_inf(text, language) - elif language == "all_zh": - bert = get_bert_feature(text, word2ph).to(device) - else: - bert = torch.zeros((1024, len(phones))).to(device) - return bert def merge_short_text_in_array(texts, threshold): if (len(texts)) < 2: @@ -425,8 +366,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, texts = merge_short_text_in_array(texts, 5) audio_opt = [] if not ref_free: - phones1, word2ph1, norm_text1=get_cleaned_text_final(prompt_text, prompt_language) - bert1=get_bert_final(phones1, word2ph1, norm_text1,prompt_language,device).to(dtype) + phones1,bert1,norm_text1=get_phones_and_bert(prompt_text, prompt_language) for text in texts: # 解决输入目标文本的空行导致报错的问题 @@ -434,8 +374,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, continue if (text[-1] not in splits): text += "。" if text_language != "en" else "." print(i18n("实际输入的目标文本(每句):"), text) - phones2, word2ph2, norm_text2 = get_cleaned_text_final(text, text_language) - bert2 = get_bert_final(phones2, word2ph2, norm_text2, text_language, device).to(dtype) + phones2,bert2,norm_text2=get_phones_and_bert(text, text_language) if not ref_free: bert = torch.cat([bert1, bert2], 1) all_phoneme_ids = torch.LongTensor(phones1+phones2).to(device).unsqueeze(0) diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py index 5334326e..ea41db1f 100644 --- a/GPT_SoVITS/text/chinese.py +++ b/GPT_SoVITS/text/chinese.py @@ -30,7 +30,7 @@ rep_map = { "\n": ".", "·": ",", "、": ",", - # "...": "…", + "...": "…", "$": ".", "/": ",", "—": "-", diff --git a/requirements.txt b/requirements.txt index fae6198d..75bd945d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,5 +23,5 @@ PyYAML psutil jieba_fast jieba -LangSegment +LangSegment>=0.2.0 Faster_Whisper \ No newline at end of file From 76570cff52ff81e90b6b5f98e80aa657afc70738 Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Tue, 20 Feb 2024 22:45:49 +0800 Subject: [PATCH 07/20] Del a-zA-Z --- GPT_SoVITS/inference_webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 70519dab..c427b25f 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -245,7 +245,7 @@ def get_phones_and_bert(text,language): formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text)) else: # 因无法区别中日文汉字,以用户输入为准 - formattext = text + formattext = re.sub('[a-zA-Z]', '', text) while " " in formattext: formattext = formattext.replace(" ", " ") phones, word2ph, norm_text = clean_text_inf(formattext, language) From 31802947108cb12d708404fb621f287fd5d13716 Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:57:58 +0000 Subject: [PATCH 08/20] Update config.py Change the inference device for Mac to accelerate inference and reduce memory leak --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index 3e9e951c..caaadd47 100644 --- a/config.py +++ b/config.py @@ -20,7 +20,7 @@ python_exec = sys.executable or "python" if torch.cuda.is_available(): infer_device = "cuda" elif torch.backends.mps.is_available(): - infer_device = "mps" + infer_device = "cpu" else: infer_device = "cpu" From 861658050b6eab32ce6a34cfee37fc63a53a4ae7 Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:03:08 +0000 Subject: [PATCH 09/20] Update inference_webui.py Change inference device to accelerate inference on Mac and reduce memory leak --- GPT_SoVITS/inference_webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index c427b25f..a046776d 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -73,7 +73,7 @@ os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时 if torch.cuda.is_available(): device = "cuda" elif torch.backends.mps.is_available(): - device = "mps" + device = "cpu" else: device = "cpu" From 84062074a311d10da5998f10b5f3d36dc8467b5f Mon Sep 17 00:00:00 2001 From: KamioRinn Date: Wed, 21 Feb 2024 01:14:09 +0800 Subject: [PATCH 10/20] Adjust text normlization --- GPT_SoVITS/inference_webui.py | 5 +++-- GPT_SoVITS/text/chinese.py | 2 ++ GPT_SoVITS/text/zh_normalization/num.py | 15 +++++++++++++++ .../text/zh_normalization/text_normlization.py | 10 +++++++--- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index c427b25f..695121ac 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -245,7 +245,7 @@ def get_phones_and_bert(text,language): formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text)) else: # 因无法区别中日文汉字,以用户输入为准 - formattext = re.sub('[a-zA-Z]', '', text) + formattext = text while " " in formattext: formattext = formattext.replace(" ", " ") phones, word2ph, norm_text = clean_text_inf(formattext, language) @@ -286,7 +286,7 @@ def get_phones_and_bert(text,language): bert_list.append(bert) bert = torch.cat(bert_list, dim=1) phones = sum(phones_list, []) - norm_text = ' '.join(norm_text_list) + norm_text = ''.join(norm_text_list) return phones,bert.to(dtype),norm_text @@ -375,6 +375,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, if (text[-1] not in splits): text += "。" if text_language != "en" else "." print(i18n("实际输入的目标文本(每句):"), text) phones2,bert2,norm_text2=get_phones_and_bert(text, text_language) + print(i18n("前端处理后的文本(每句):"), norm_text2) if not ref_free: bert = torch.cat([bert1, bert2], 1) all_phoneme_ids = torch.LongTensor(phones1+phones2).to(device).unsqueeze(0) diff --git a/GPT_SoVITS/text/chinese.py b/GPT_SoVITS/text/chinese.py index ea41db1f..f9a4b360 100644 --- a/GPT_SoVITS/text/chinese.py +++ b/GPT_SoVITS/text/chinese.py @@ -34,6 +34,8 @@ rep_map = { "$": ".", "/": ",", "—": "-", + "~": "…", + "~":"…", } tone_modifier = ToneSandhi() diff --git a/GPT_SoVITS/text/zh_normalization/num.py b/GPT_SoVITS/text/zh_normalization/num.py index 8a54d3e6..8ef7f48f 100644 --- a/GPT_SoVITS/text/zh_normalization/num.py +++ b/GPT_SoVITS/text/zh_normalization/num.py @@ -172,6 +172,21 @@ def replace_range(match) -> str: return result +# ~至表达式 +RE_TO_RANGE = re.compile( + r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)[~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)') + +def replace_to_range(match) -> str: + """ + Args: + match (re.Match) + Returns: + str + """ + result = match.group(0).replace('~', '至') + return result + + def _get_value(value_string: str, use_zero: bool=True) -> List[str]: stripped = value_string.lstrip('0') if len(stripped) == 0: diff --git a/GPT_SoVITS/text/zh_normalization/text_normlization.py b/GPT_SoVITS/text/zh_normalization/text_normlization.py index 1250e96c..712537d5 100644 --- a/GPT_SoVITS/text/zh_normalization/text_normlization.py +++ b/GPT_SoVITS/text/zh_normalization/text_normlization.py @@ -33,6 +33,7 @@ from .num import RE_NUMBER from .num import RE_PERCENTAGE from .num import RE_POSITIVE_QUANTIFIERS from .num import RE_RANGE +from .num import RE_TO_RANGE from .num import replace_default_num from .num import replace_frac from .num import replace_negative_num @@ -40,6 +41,7 @@ from .num import replace_number from .num import replace_percentage from .num import replace_positive_quantifier from .num import replace_range +from .num import replace_to_range from .phonecode import RE_MOBILE_PHONE from .phonecode import RE_NATIONAL_UNIFORM_NUMBER from .phonecode import RE_TELEPHONE @@ -65,7 +67,7 @@ class TextNormalizer(): if lang == "zh": text = text.replace(" ", "") # 过滤掉特殊字符 - text = re.sub(r'[——《》【】<=>{}()()#&@“”^_|…\\]', '', text) + text = re.sub(r'[——《》【】<=>{}()()#&@“”^_|\\]', '', text) text = self.SENTENCE_SPLITOR.sub(r'\1\n', text) text = text.strip() sentences = [sentence.strip() for sentence in re.split(r'\n+', text)] @@ -73,8 +75,8 @@ class TextNormalizer(): def _post_replace(self, sentence: str) -> str: sentence = sentence.replace('/', '每') - sentence = sentence.replace('~', '至') - sentence = sentence.replace('~', '至') + # sentence = sentence.replace('~', '至') + # sentence = sentence.replace('~', '至') sentence = sentence.replace('①', '一') sentence = sentence.replace('②', '二') sentence = sentence.replace('③', '三') @@ -128,6 +130,8 @@ class TextNormalizer(): sentence = RE_TIME_RANGE.sub(replace_time, sentence) sentence = RE_TIME.sub(replace_time, sentence) + # 处理~波浪号作为至的替换 + sentence = RE_TO_RANGE.sub(replace_to_range, sentence) sentence = RE_TEMPERATURE.sub(replace_temperature, sentence) sentence = replace_measure(sentence) sentence = RE_FRAC.sub(replace_frac, sentence) From 220367f90c85f6dc20751c4a586320c463b28406 Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Wed, 21 Feb 2024 01:15:11 +0000 Subject: [PATCH 11/20] Update inference_webui.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 精简代码 --- GPT_SoVITS/inference_webui.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index a046776d..3a4bfb3e 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -72,8 +72,6 @@ os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时 if torch.cuda.is_available(): device = "cuda" -elif torch.backends.mps.is_available(): - device = "cpu" else: device = "cpu" From db40317d9ceaf782b5ccb383e044281a0489f29a Mon Sep 17 00:00:00 2001 From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com> Date: Wed, 21 Feb 2024 01:15:31 +0000 Subject: [PATCH 12/20] Update config.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 精简代码 --- config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/config.py b/config.py index caaadd47..1f741285 100644 --- a/config.py +++ b/config.py @@ -19,8 +19,6 @@ exp_root = "logs" python_exec = sys.executable or "python" if torch.cuda.is_available(): infer_device = "cuda" -elif torch.backends.mps.is_available(): - infer_device = "cpu" else: infer_device = "cpu" From b0b039ad2154d9867ae77bd484367fc6a8d1d2c7 Mon Sep 17 00:00:00 2001 From: Kenn Zhang Date: Sat, 17 Feb 2024 09:57:18 +0000 Subject: [PATCH 13/20] =?UTF-8?q?Docker=E9=95=9C=E5=83=8F=E6=9E=84?= =?UTF-8?q?=E5=BB=BA=E8=84=9A=E6=9C=AC=E5=AF=B9=E4=BA=8E=E9=95=9C=E5=83=8F?= =?UTF-8?q?=E7=9A=84Tag=E5=A2=9E=E5=8A=A0Git=20Commit=E7=9A=84Hash?= =?UTF-8?q?=E5=80=BC=EF=BC=8C=E4=BE=BF=E4=BA=8E=E7=9F=A5=E9=81=93=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E4=B8=AD=E5=BA=94=E7=94=A8=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .dockerignore | 4 +++- dockerbuild.sh | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index dc39f76f..4eca27be 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,4 +3,6 @@ logs output reference SoVITS_weights -.git \ No newline at end of file +GPT_weights +TEMP +.git diff --git a/dockerbuild.sh b/dockerbuild.sh index 1b3dcee5..3a4a1e18 100755 --- a/dockerbuild.sh +++ b/dockerbuild.sh @@ -2,13 +2,20 @@ # 获取当前日期,格式为 YYYYMMDD DATE=$(date +%Y%m%d) +# 获取最新的 Git commit 哈希值的前 7 位 +COMMIT_HASH=$(git rev-parse HEAD | cut -c 1-7) # 构建 full 版本的镜像 docker build --build-arg IMAGE_TYPE=full -t breakstring/gpt-sovits:latest . # 为同一个镜像添加带日期的标签 docker tag breakstring/gpt-sovits:latest breakstring/gpt-sovits:dev-$DATE +# 为同一个镜像添加带当前代码库Commit哈希值的标签 +docker tag breakstring/gpt-sovits:latest breakstring/gpt-sovits:dev-$COMMIT_HASH -# 构建 elite 版本的镜像 + +# 构建 elite 版本的镜像(无模型下载步骤,需手工将模型下载安装进容器) docker build --build-arg IMAGE_TYPE=elite -t breakstring/gpt-sovits:latest-elite . # 为同一个镜像添加带日期的标签 docker tag breakstring/gpt-sovits:latest-elite breakstring/gpt-sovits:dev-$DATE-elite +# 为同一个镜像添加带当前代码库Commit哈希值的标签 +docker tag breakstring/gpt-sovits:latest-elite breakstring/gpt-sovits:dev-$COMMIT_HASH-elite From 4b0fae83020389eed0dfd283c5122e5f3df584fc Mon Sep 17 00:00:00 2001 From: JavaAndPython55 <34533090+JavaAndPython55@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:11:59 +0800 Subject: [PATCH 14/20] =?UTF-8?q?=E6=96=B0=E5=A2=9Eapi.py=E4=B8=AD?= =?UTF-8?q?=EF=BC=9A=E5=8F=AF=E5=9C=A8=E5=90=AF=E5=8A=A8=E5=90=8E=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E4=BF=AE=E6=94=B9=E6=A8=A1=E5=9E=8B=EF=BC=8C=E4=BB=A5?= =?UTF-8?q?=E6=AD=A4=E6=BB=A1=E8=B6=B3=E5=90=8C=E4=B8=80=E4=B8=AAapi?= =?UTF-8?q?=E4=B8=8D=E5=90=8C=E7=9A=84=E6=9C=97=E8=AF=BB=E8=80=85=E8=AF=B7?= =?UTF-8?q?=E6=B1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 可在启动后动态修改模型,以此满足同一个api不同的朗读者请求 --- api.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/api.py b/api.py index b8d584e7..754f0769 100644 --- a/api.py +++ b/api.py @@ -144,7 +144,7 @@ parser.add_argument("-dt", "--default_refer_text", type=str, default="", help=" parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种") parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu / mps") -parser.add_argument("-a", "--bind_addr", type=str, default="127.0.0.1", help="default: 127.0.0.1") +parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0") parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880") parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度") parser.add_argument("-hp", "--half_precision", action="store_true", default=False, help="覆盖config.is_half为True, 使用半精度") @@ -227,6 +227,44 @@ def is_full(*items): # 任意一项为空返回False return False return True +def change_sovits_weights(sovits_path): + global vq_model, hps + dict_s2 = torch.load(sovits_path, map_location="cpu") + hps = dict_s2["config"] + hps = DictToAttrRecursive(hps) + hps.model.semantic_frame_rate = "25hz" + vq_model = SynthesizerTrn( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + n_speakers=hps.data.n_speakers, + **hps.model + ) + if ("pretrained" not in sovits_path): + del vq_model.enc_q + if is_half == True: + vq_model = vq_model.half().to(device) + else: + vq_model = vq_model.to(device) + vq_model.eval() + print(vq_model.load_state_dict(dict_s2["weight"], strict=False)) + with open("./sweight.txt", "w", encoding="utf-8") as f: + f.write(sovits_path) +def change_gpt_weights(gpt_path): + global hz, max_sec, t2s_model, config + hz = 50 + dict_s1 = torch.load(gpt_path, map_location="cpu") + config = dict_s1["config"] + max_sec = config["data"]["max_sec"] + t2s_model = Text2SemanticLightningModule(config, "****", is_train=False) + t2s_model.load_state_dict(dict_s1["weight"]) + if is_half == True: + t2s_model = t2s_model.half() + t2s_model = t2s_model.to(device) + t2s_model.eval() + total = sum([param.nelement() for param in t2s_model.parameters()]) + print("Number of parameter: %.2fM" % (total / 1e6)) + with open("./gweight.txt", "w", encoding="utf-8") as f: f.write(gpt_path) + def get_bert_feature(text, word2ph): with torch.no_grad(): @@ -452,6 +490,20 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language): app = FastAPI() +#clark新增-----2024-02-21 +#可在启动后动态修改模型,以此满足同一个api不同的朗读者请求 +@app.post("/set_model") +async def set_model(request: Request): + json_post_raw = await request.json() + global gpt_path + gpt_path=json_post_raw.get("gpt_model_path") + global sovits_path + sovits_path=json_post_raw.get("sovits_model_path") + print("gptpath"+gpt_path+";vitspath"+sovits_path) + change_sovits_weights(sovits_path) + change_gpt_weights(gpt_path) + return "ok" +# 新增-----end------ @app.post("/control") async def control(request: Request): From 6da486c15d09e3d99fa42c5e560aaac56b6b4ce1 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:27:59 +0800 Subject: [PATCH 15/20] Add files via upload --- webui.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/webui.py b/webui.py index cff7cdb2..c6430d92 100644 --- a/webui.py +++ b/webui.py @@ -117,6 +117,7 @@ def change_choices(): p_label=None p_uvr5=None p_asr=None +p_denoise=None p_tts_inference=None def kill_proc_tree(pid, including_parent=True): @@ -220,6 +221,29 @@ def close_asr(): kill_process(p_asr.pid) p_asr=None return "已终止ASR进程",{"__type__":"update","visible":True},{"__type__":"update","visible":False} +def open_denoise(denoise_inp_dir, denoise_opt_dir): + global p_denoise + if(p_denoise==None): + denoise_inp_dir=my_utils.clean_path(denoise_inp_dir) + denoise_opt_dir=my_utils.clean_path(denoise_opt_dir) + cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s'%(python_exec,denoise_inp_dir,denoise_opt_dir,"float16"if is_half==True else "float32") + + yield "语音降噪任务开启:%s"%cmd,{"__type__":"update","visible":False},{"__type__":"update","visible":True} + print(cmd) + p_denoise = Popen(cmd, shell=True) + p_denoise.wait() + p_denoise=None + yield f"语音降噪任务完成, 查看终端进行下一步",{"__type__":"update","visible":True},{"__type__":"update","visible":False} + else: + yield "已有正在进行的语音降噪任务,需先终止才能开启下一次任务",{"__type__":"update","visible":False},{"__type__":"update","visible":True} + # return None + +def close_denoise(): + global p_denoise + if(p_denoise!=None): + kill_process(p_denoise.pid) + p_denoise=None + return "已终止语音降噪进程",{"__type__":"update","visible":True},{"__type__":"update","visible":False} p_train_SoVITS=None def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D): @@ -678,6 +702,13 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True) n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True) slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息")) + gr.Markdown(value=i18n("0bb-语音降噪工具")) + with gr.Row(): + open_denoise_button = gr.Button(i18n("开启语音降噪"), variant="primary",visible=True) + close_denoise_button = gr.Button(i18n("终止语音降噪进程"), variant="primary",visible=False) + denoise_input_dir=gr.Textbox(label=i18n("降噪音频文件输入文件夹"),value="") + denoise_output_dir=gr.Textbox(label=i18n("降噪结果输出文件夹"),value="output/denoise_opt") + denoise_info = gr.Textbox(label=i18n("语音降噪进程输出信息")) gr.Markdown(value=i18n("0c-中文批量离线ASR工具")) with gr.Row(): open_asr_button = gr.Button(i18n("开启离线批量ASR"), variant="primary",visible=True) @@ -740,6 +771,9 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button]) open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button]) close_slicer_button.click(close_slice, [], [slicer_info,open_slicer_button,close_slicer_button]) + open_denoise_button.click(open_denoise, [denoise_input_dir,denoise_output_dir], [denoise_info,open_denoise_button,close_denoise_button]) + close_denoise_button.click(close_denoise, [], [denoise_info,open_denoise_button,close_denoise_button]) + with gr.TabItem(i18n("1-GPT-SoVITS-TTS")): with gr.Row(): exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True) From 5a17177342d2df1e11369f2f4f58d34a3feb1a35 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:28:22 +0800 Subject: [PATCH 16/20] Add files via upload --- tools/cmd-denoise.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tools/cmd-denoise.py diff --git a/tools/cmd-denoise.py b/tools/cmd-denoise.py new file mode 100644 index 00000000..69b51e66 --- /dev/null +++ b/tools/cmd-denoise.py @@ -0,0 +1,29 @@ +import os,argparse + +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from tqdm import tqdm + +path_denoise = 'tools/denoise-model/speech_frcrn_ans_cirm_16k' +path_denoise = path_denoise if os.path.exists(path_denoise) else "damo/speech_frcrn_ans_cirm_16k" +ans = pipeline(Tasks.acoustic_noise_suppression,model=path_denoise) +def execute_denoise(input_folder,output_folder): + os.makedirs(output_folder,exist_ok=True) + # print(input_folder) + # print(list(os.listdir(input_folder).sort())) + for name in tqdm(os.listdir(input_folder)): + ans("%s/%s"%(input_folder,name),output_path='%s/%s'%(output_folder,name)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input_folder", type=str, required=True, + help="Path to the folder containing WAV files.") + parser.add_argument("-o", "--output_folder", type=str, required=True, + help="Output folder to store transcriptions.") + parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'], + help="fp16 or fp32")#还没接入 + cmd = parser.parse_args() + execute_denoise( + input_folder = cmd.input_folder, + output_folder = cmd.output_folder, + ) \ No newline at end of file From 82085e48869fbe6f817e83a7e858309ca2f06bd6 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:29:14 +0800 Subject: [PATCH 17/20] Create .gitignore --- tools/denoise-model/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tools/denoise-model/.gitignore diff --git a/tools/denoise-model/.gitignore b/tools/denoise-model/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/tools/denoise-model/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From 788ea251dafa9aff6de7ca019d1870443f08f445 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:33:13 +0800 Subject: [PATCH 18/20] Update Changelog_CN.md --- docs/cn/Changelog_CN.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/cn/Changelog_CN.md b/docs/cn/Changelog_CN.md index 6622146a..8afd3514 100644 --- a/docs/cn/Changelog_CN.md +++ b/docs/cn/Changelog_CN.md @@ -125,6 +125,16 @@ 2-修复中文文本前端bug https://github.com/RVC-Boss/GPT-SoVITS/issues/475 +### 20240221更新 + +1-数据处理添加语音降噪选项 + +2-中文日文前端处理优化 https://github.com/RVC-Boss/GPT-SoVITS/pull/559 https://github.com/RVC-Boss/GPT-SoVITS/pull/556 https://github.com/RVC-Boss/GPT-SoVITS/pull/532 https://github.com/RVC-Boss/GPT-SoVITS/pull/507 https://github.com/RVC-Boss/GPT-SoVITS/pull/509 + +3-mac CPU推理更快因此把推理设备从mps改到CPU + +4-colab修复不开启公网url + todolist: 1-中文多音字推理优化 From 8b4f0dfe43ed92606e5ff4fd95040abb8bba541b Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:37:19 +0800 Subject: [PATCH 19/20] Update 2-get-hubert-wav32k.py --- GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 76a7ec99..7607259e 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -99,7 +99,7 @@ for line in lines[int(i_part)::int(all_parts)]: try: # wav_name,text=line.split("\t") wav_name, spk_name, language, text = line.split("|") - if (inp_wav_dir != ""): + if (inp_wav_dir != "" and inp_wav_dir != None): wav_name = os.path.basename(wav_name) wav_path = "%s/%s"%(inp_wav_dir, wav_name) From 939971afe3770c530b0bc0f9a1d5824a1786411d Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Wed, 21 Feb 2024 18:52:07 +0800 Subject: [PATCH 20/20] Add files via upload --- GPT_SoVITS/inference_webui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 2247bc74..d2f3f949 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -561,12 +561,12 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app: inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath") with gr.Column(): ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True) - gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT")) + gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。")) prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="") prompt_language = gr.Dropdown( label=i18n("参考音频的语种"), choices=[i18n("中文"), i18n("英文"), i18n("日文"), i18n("中英混合"), i18n("日英混合"), i18n("多语种混合")], value=i18n("中文") ) - gr.Markdown(value=i18n("*请填写需要合成的目标文本。中英混合选中文,日英混合选日文,中日混合暂不支持,非目标语言文本自动遗弃。")) + gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式")) with gr.Row(): text = gr.Textbox(label=i18n("需要合成的文本"), value="") text_language = gr.Dropdown(