Fix dependency-related issues via requirements update (#2236)

* Update requirements.txt

* Create constraints.txt

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* pyopenjtalk and onnx fix

* Update requirements.txt

* Update requirements.txt

* Update install.sh

* update shell install.sh

* update docs

* Update Install.sh

* fix bugs

* Update .gitignore

* Update .gitignore

* Update install.sh

* Update install.sh

* Update extra-req.txt

* Update requirements.txt
This commit is contained in:
XXXXRT666 2025-03-31 04:27:12 +01:00 committed by GitHub
parent ee4a466f79
commit 6c468583c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 527 additions and 200 deletions

178
.gitignore vendored
View File

@ -18,5 +18,183 @@ TEMP
weight.json weight.json
ffmpeg* ffmpeg*
ffprobe* ffprobe*
cfg.json
speakers.json
ref_audios
tools/AP_BWE_main/24kto48k/* tools/AP_BWE_main/24kto48k/*
!tools/AP_BWE_main/24kto48k/readme.txt !tools/AP_BWE_main/24kto48k/readme.txt
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc

View File

@ -1,42 +1,37 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"# Credits for bubarino giving me the huggingface import code (感谢 bubarino 给了我 huggingface 导入代码)"
],
"metadata": { "metadata": {
"id": "himHYZmra7ix" "id": "himHYZmra7ix"
} },
"source": [
"# Credits for bubarino giving me the huggingface import code (感谢 bubarino 给了我 huggingface 导入代码)"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": { "metadata": {
"id": "e9b7iFV3dm1f" "id": "e9b7iFV3dm1f"
}, },
"outputs": [],
"source": [ "source": [
"!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n", "!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n",
"%cd GPT-SoVITS\n", "%cd GPT-SoVITS\n",
"!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n", "!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n",
"!pip install -r extra-req.txt --no-deps\n",
"!pip install -r requirements.txt" "!pip install -r requirements.txt"
], ]
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "0NgxXg5sjv7z"
},
"outputs": [],
"source": [ "source": [
"# @title Download pretrained models 下载预训练模型\n", "# @title Download pretrained models 下载预训练模型\n",
"!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n", "!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
@ -53,16 +48,16 @@
"!git clone https://huggingface.co/Delik/uvr5_weights\n", "!git clone https://huggingface.co/Delik/uvr5_weights\n",
"!git config core.sparseCheckout true\n", "!git config core.sparseCheckout true\n",
"!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/" "!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/"
], ]
"metadata": {
"id": "0NgxXg5sjv7z",
"cellView": "form"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "cPDEH-9czOJF"
},
"outputs": [],
"source": [ "source": [
"#@title Create folder models 创建文件夹模型\n", "#@title Create folder models 创建文件夹模型\n",
"import os\n", "import os\n",
@ -77,16 +72,16 @@
" print(f\"The folder '{folder_name}' was created successfully! (文件夹'{folder_name}'已成功创建!)\")\n", " print(f\"The folder '{folder_name}' was created successfully! (文件夹'{folder_name}'已成功创建!)\")\n",
"\n", "\n",
"print(\"All folders have been created. (所有文件夹均已创建。)\")" "print(\"All folders have been created. (所有文件夹均已创建。)\")"
], ]
"metadata": {
"cellView": "form",
"id": "cPDEH-9czOJF"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "vbZY-LnM0tzq"
},
"outputs": [],
"source": [ "source": [
"import requests\n", "import requests\n",
"import zipfile\n", "import zipfile\n",
@ -124,29 +119,35 @@
" shutil.move(source_path, destination_path)\n", " shutil.move(source_path, destination_path)\n",
"\n", "\n",
"print(f'Model downloaded. (模型已下载。)')" "print(f'Model downloaded. (模型已下载。)')"
], ]
"metadata": {
"cellView": "form",
"id": "vbZY-LnM0tzq"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "4oRGUzkrk8C7"
},
"outputs": [],
"source": [ "source": [
"# @title launch WebUI 启动WebUI\n", "# @title launch WebUI 启动WebUI\n",
"!/usr/local/bin/pip install ipykernel\n", "!/usr/local/bin/pip install ipykernel\n",
"!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n", "!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n",
"%cd /content/GPT-SoVITS/\n", "%cd /content/GPT-SoVITS/\n",
"!/usr/local/bin/python webui.py" "!/usr/local/bin/python webui.py"
],
"metadata": {
"id": "4oRGUzkrk8C7",
"cellView": "form"
},
"execution_count": null,
"outputs": []
}
] ]
} }
],
"metadata": {
"accelerator": "GPU",
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -1,6 +1,5 @@
<div align="center"> <div align="center">
<h1>GPT-SoVITS-WebUI</h1> <h1>GPT-SoVITS-WebUI</h1>
A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br> A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
@ -77,6 +76,7 @@ bash install.sh
```bash ```bash
conda create -n GPTSoVits python=3.9 conda create -n GPTSoVits python=3.9
conda activate GPTSoVits conda activate GPTSoVits
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -105,6 +105,7 @@ Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWeb
Install [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) (Korean TTS Only) Install [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) (Korean TTS Only)
##### MacOS Users ##### MacOS Users
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -112,6 +113,7 @@ brew install ffmpeg
#### Install Dependences #### Install Dependences
```bash ```bash
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -200,6 +202,7 @@ if you want to switch to V1,then
```bash ```bash
python webui.py v1 <language(optional)> python webui.py v1 <language(optional)>
``` ```
Or maunally switch version in WebUI Or maunally switch version in WebUI
### Finetune ### Finetune
@ -224,11 +227,13 @@ Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference
```bash ```bash
python GPT_SoVITS/inference_webui.py <language(optional)> python GPT_SoVITS/inference_webui.py <language(optional)>
``` ```
OR OR
```bash ```bash
python webui.py python webui.py
``` ```
then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference` then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
## V2 Release Notes ## V2 Release Notes
@ -243,7 +248,7 @@ New Features:
4. Improved synthesis quality for low-quality reference audio 4. Improved synthesis quality for low-quality reference audio
[more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [more details](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
Use v2 from v1 environment: Use v2 from v1 environment:
@ -263,7 +268,7 @@ New Features:
2. GPT model is more stable, with fewer repetitions and omissions, and it is easier to generate speech with richer emotional expression. 2. GPT model is more stable, with fewer repetitions and omissions, and it is easier to generate speech with richer emotional expression.
[more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [more details](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
Use v3 from v2 environment: Use v3 from v2 environment:
@ -275,7 +280,6 @@ Use v3 from v2 environment:
additional: for Audio Super Resolution model, you can read [how to download](./tools/AP_BWE_main/24kto48k/readme.txt) additional: for Audio Super Resolution model, you can read [how to download](./tools/AP_BWE_main/24kto48k/readme.txt)
## Todo List ## Todo List
- [x] **High Priority:** - [x] **High Priority:**
@ -297,15 +301,20 @@ Use v3 from v2 environment:
- [ ] model mix - [ ] model mix
## (Additional) Method for running from the command line ## (Additional) Method for running from the command line
Use the command line to open the WebUI for UVR5 Use the command line to open the WebUI for UVR5
``` ```
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5> python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
``` ```
<!-- If you can't open a browser, follow the format below for UVR processing,This is using mdxnet for audio processing <!-- If you can't open a browser, follow the format below for UVR processing,This is using mdxnet for audio processing
``` ```
python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
``` --> ``` -->
This is how the audio segmentation of the dataset is done using the command line This is how the audio segmentation of the dataset is done using the command line
``` ```
python audio_slicer.py \ python audio_slicer.py \
--input_path "<path_to_original_audio_file_or_directory>" \ --input_path "<path_to_original_audio_file_or_directory>" \
@ -315,16 +324,21 @@ python audio_slicer.py \
--min_interval <shortest_time_gap_between_adjacent_subclips> --min_interval <shortest_time_gap_between_adjacent_subclips>
--hop_size <step_size_for_computing_volume_curve> --hop_size <step_size_for_computing_volume_curve>
``` ```
This is how dataset ASR processing is done using the command line(Only Chinese) This is how dataset ASR processing is done using the command line(Only Chinese)
``` ```
python tools/asr/funasr_asr.py -i <input> -o <output> python tools/asr/funasr_asr.py -i <input> -o <output>
``` ```
ASR processing is performed through Faster_Whisper(ASR marking except Chinese) ASR processing is performed through Faster_Whisper(ASR marking except Chinese)
(No progress bars, GPU performance may cause time delays) (No progress bars, GPU performance may cause time delays)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
``` ```
A custom list save path is enabled A custom list save path is enabled
## Credits ## Credits
@ -332,6 +346,7 @@ A custom list save path is enabled
Special thanks to the following projects and contributors: Special thanks to the following projects and contributors:
### Theoretical Research ### Theoretical Research
- [ar-vits](https://github.com/innnky/ar-vits) - [ar-vits](https://github.com/innnky/ar-vits)
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
- [vits](https://github.com/jaywalnut310/vits) - [vits](https://github.com/jaywalnut310/vits)
@ -341,17 +356,23 @@ Special thanks to the following projects and contributors:
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
- [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py) - [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py)
- [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py) - [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py)
### Pretrained Models ### Pretrained Models
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
- [BigVGAN](https://github.com/NVIDIA/BigVGAN) - [BigVGAN](https://github.com/NVIDIA/BigVGAN)
### Text Frontend for Inference ### Text Frontend for Inference
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) - [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
- [split-lang](https://github.com/DoodleBears/split-lang) - [split-lang](https://github.com/DoodleBears/split-lang)
- [g2pW](https://github.com/GitYCC/g2pW) - [g2pW](https://github.com/GitYCC/g2pW)
- [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW) - [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW)
- [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw) - [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw)
### WebUI Tools ### WebUI Tools
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
- [audio-slicer](https://github.com/openvpi/audio-slicer) - [audio-slicer](https://github.com/openvpi/audio-slicer)
- [SubFix](https://github.com/cronrpc/SubFix) - [SubFix](https://github.com/cronrpc/SubFix)

View File

@ -1,23 +1,10 @@
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
"id": "view-in-github", "colab_type": "text",
"colab_type": "text" "id": "view-in-github"
}, },
"source": [ "source": [
"<a href=\"https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" "<a href=\"https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
@ -25,18 +12,20 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"环境配置 environment"
],
"metadata": { "metadata": {
"id": "_o6a8GS2lWQM" "id": "_o6a8GS2lWQM"
} },
"source": [
"环境配置 environment"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": { "metadata": {
"id": "e9b7iFV3dm1f" "id": "e9b7iFV3dm1f"
}, },
"outputs": [],
"source": [ "source": [
"!pip install -q condacolab\n", "!pip install -q condacolab\n",
"# Setting up condacolab and installing packages\n", "# Setting up condacolab and installing packages\n",
@ -47,13 +36,17 @@
"!conda install -y -q -c pytorch -c nvidia cudatoolkit\n", "!conda install -y -q -c pytorch -c nvidia cudatoolkit\n",
"%cd -q /content/GPT-SoVITS\n", "%cd -q /content/GPT-SoVITS\n",
"!conda install -y -q -c conda-forge gcc gxx ffmpeg cmake -c pytorch -c nvidia\n", "!conda install -y -q -c conda-forge gcc gxx ffmpeg cmake -c pytorch -c nvidia\n",
"!/usr/local/bin/pip install -r extra-req.txt --no-deps\n",
"!/usr/local/bin/pip install -r requirements.txt" "!/usr/local/bin/pip install -r requirements.txt"
], ]
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0NgxXg5sjv7z"
},
"outputs": [],
"source": [ "source": [
"# @title Download pretrained models 下载预训练模型\n", "# @title Download pretrained models 下载预训练模型\n",
"!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n", "!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
@ -71,27 +64,35 @@
"!git clone https://huggingface.co/Delik/uvr5_weights\n", "!git clone https://huggingface.co/Delik/uvr5_weights\n",
"!git config core.sparseCheckout true\n", "!git config core.sparseCheckout true\n",
"!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/" "!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/"
], ]
"metadata": {
"id": "0NgxXg5sjv7z"
},
"execution_count": null,
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4oRGUzkrk8C7"
},
"outputs": [],
"source": [ "source": [
"# @title launch WebUI 启动WebUI\n", "# @title launch WebUI 启动WebUI\n",
"!/usr/local/bin/pip install ipykernel\n", "!/usr/local/bin/pip install ipykernel\n",
"!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n", "!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n",
"%cd /content/GPT-SoVITS/\n", "%cd /content/GPT-SoVITS/\n",
"!/usr/local/bin/python webui.py" "!/usr/local/bin/python webui.py"
],
"metadata": {
"id": "4oRGUzkrk8C7"
},
"execution_count": null,
"outputs": []
}
] ]
} }
],
"metadata": {
"accelerator": "GPU",
"colab": {
"include_colab_link": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -76,6 +76,7 @@ bash install.sh
```bash ```bash
conda create -n GPTSoVits python=3.9 conda create -n GPTSoVits python=3.9
conda activate GPTSoVits conda activate GPTSoVits
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -104,6 +105,7 @@ conda install -c conda-forge 'ffmpeg<7'
安装 [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) 环境(仅限韩语 TTS) 安装 [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) 环境(仅限韩语 TTS)
##### MacOS 用户 ##### MacOS 用户
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -111,6 +113,7 @@ brew install ffmpeg
#### 安装依赖 #### 安装依赖
```bash ```bash
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -155,7 +158,6 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
- 建议在模型名称和配置文件名中**直接指定模型类型**,例如`mel_mand_roformer``bs_roformer`。如果未指定,将从配置文中比对特征,以确定它是哪种类型的模型。例如,模型`bs_roformer_ep_368_sdr_12.9628.ckpt` 和对应的配置文件`bs_roformer_ep_368_sdr_12.9628.yaml` 是一对。`kim_mel_band_roformer.ckpt``kim_mel_band_roformer.yaml` 也是一对。 - 建议在模型名称和配置文件名中**直接指定模型类型**,例如`mel_mand_roformer``bs_roformer`。如果未指定,将从配置文中比对特征,以确定它是哪种类型的模型。例如,模型`bs_roformer_ep_368_sdr_12.9628.ckpt` 和对应的配置文件`bs_roformer_ep_368_sdr_12.9628.yaml` 是一对。`kim_mel_band_roformer.ckpt``kim_mel_band_roformer.yaml` 也是一对。
4. 对于中文 ASR额外功能从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files)、[Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files) 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型,并将它们放置在 `tools/asr/models` 目录中。 4. 对于中文 ASR额外功能从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files)、[Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files) 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型,并将它们放置在 `tools/asr/models` 目录中。
5. 对于英语或日语 ASR额外功能从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将其放置在 `tools/asr/models` 目录中。此外,[其他模型](https://huggingface.co/Systran) 可能具有类似效果且占用更少的磁盘空间。 5. 对于英语或日语 ASR额外功能从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型,并将其放置在 `tools/asr/models` 目录中。此外,[其他模型](https://huggingface.co/Systran) 可能具有类似效果且占用更少的磁盘空间。
@ -202,6 +204,7 @@ python webui.py <language(optional)>
```bash ```bash
python webui.py v1 <language(optional)> python webui.py v1 <language(optional)>
``` ```
或者在 webUI 内动态切换 或者在 webUI 内动态切换
### 微调 ### 微调
@ -226,11 +229,13 @@ python webui.py v1 <language(optional)>
```bash ```bash
python GPT_SoVITS/inference_webui.py <language(optional)> python GPT_SoVITS/inference_webui.py <language(optional)>
``` ```
或者 或者
```bash ```bash
python webui.py python webui.py
``` ```
然后在 `1-GPT-SoVITS-TTS/1C-推理` 中打开推理 webUI 然后在 `1-GPT-SoVITS-TTS/1C-推理` 中打开推理 webUI
## V2 发布说明 ## V2 发布说明
@ -245,7 +250,7 @@ python webui.py
4. 对低音质参考音频(尤其是来源于网络的高频严重缺失、听着很闷的音频)合成出来音质更好 4. 对低音质参考音频(尤其是来源于网络的高频严重缺失、听着很闷的音频)合成出来音质更好
详见[wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) 详见[wiki](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
从 v1 环境迁移至 v2 从 v1 环境迁移至 v2
@ -265,7 +270,7 @@ python webui.py
2. GPT 合成更稳定,重复漏字更少,也更容易跑出丰富情感 2. GPT 合成更稳定,重复漏字更少,也更容易跑出丰富情感
详见[wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) 详见[wiki](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
从 v2 环境迁移至 v3 从 v2 环境迁移至 v3
@ -277,7 +282,6 @@ python webui.py
如果想用音频超分功能缓解 v3 模型生成 24k 音频觉得闷的问题,需要下载额外的模型参数,参考[how to download](../../tools/AP_BWE_main/24kto48k/readme.txt) 如果想用音频超分功能缓解 v3 模型生成 24k 音频觉得闷的问题,需要下载额外的模型参数,参考[how to download](../../tools/AP_BWE_main/24kto48k/readme.txt)
## 待办事项清单 ## 待办事项清单
- [x] **高优先级:** - [x] **高优先级:**
@ -299,16 +303,21 @@ python webui.py
- [ ] 模型混合。 - [ ] 模型混合。
## (附加)命令行运行方式 ## (附加)命令行运行方式
使用命令行打开 UVR5 的 WebUI 使用命令行打开 UVR5 的 WebUI
````
```
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5> python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
```` ```
<!-- 如果打不开浏览器请按照下面的格式进行UVR处理这是使用mdxnet进行音频处理的方式 <!-- 如果打不开浏览器请按照下面的格式进行UVR处理这是使用mdxnet进行音频处理的方式
```` ````
python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
```` --> ```` -->
这是使用命令行完成数据集的音频切分的方式 这是使用命令行完成数据集的音频切分的方式
````
```
python audio_slicer.py \ python audio_slicer.py \
--input_path "<path_to_original_audio_file_or_directory>" \ --input_path "<path_to_original_audio_file_or_directory>" \
--output_root "<directory_where_subdivided_audio_clips_will_be_saved>" \ --output_root "<directory_where_subdivided_audio_clips_will_be_saved>" \
@ -316,17 +325,22 @@ python audio_slicer.py \
--min_length <minimum_duration_of_each_subclip> \ --min_length <minimum_duration_of_each_subclip> \
--min_interval <shortest_time_gap_between_adjacent_subclips> --min_interval <shortest_time_gap_between_adjacent_subclips>
--hop_size <step_size_for_computing_volume_curve> --hop_size <step_size_for_computing_volume_curve>
```` ```
这是使用命令行完成数据集 ASR 处理的方式(仅限中文) 这是使用命令行完成数据集 ASR 处理的方式(仅限中文)
````
```
python tools/asr/funasr_asr.py -i <input> -o <output> python tools/asr/funasr_asr.py -i <input> -o <output>
```` ```
通过 Faster_Whisper 进行 ASR 处理(除中文之外的 ASR 标记) 通过 Faster_Whisper 进行 ASR 处理(除中文之外的 ASR 标记)
没有进度条GPU 性能可能会导致时间延迟) 没有进度条GPU 性能可能会导致时间延迟)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
``` ```
启用自定义列表保存路径 启用自定义列表保存路径
## 致谢 ## 致谢
@ -334,6 +348,7 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
特别感谢以下项目和贡献者: 特别感谢以下项目和贡献者:
### 理论研究 ### 理论研究
- [ar-vits](https://github.com/innnky/ar-vits) - [ar-vits](https://github.com/innnky/ar-vits)
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
- [vits](https://github.com/jaywalnut310/vits) - [vits](https://github.com/jaywalnut310/vits)
@ -343,17 +358,23 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
- [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py) - [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py)
- [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py) - [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py)
### 预训练模型 ### 预训练模型
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
- [BigVGAN](https://github.com/NVIDIA/BigVGAN) - [BigVGAN](https://github.com/NVIDIA/BigVGAN)
### 推理用文本前端 ### 推理用文本前端
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) - [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
- [split-lang](https://github.com/DoodleBears/split-lang) - [split-lang](https://github.com/DoodleBears/split-lang)
- [g2pW](https://github.com/GitYCC/g2pW) - [g2pW](https://github.com/GitYCC/g2pW)
- [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW) - [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW)
- [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw) - [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw)
### WebUI 工具 ### WebUI 工具
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
- [audio-slicer](https://github.com/openvpi/audio-slicer) - [audio-slicer](https://github.com/openvpi/audio-slicer)
- [SubFix](https://github.com/cronrpc/SubFix) - [SubFix](https://github.com/cronrpc/SubFix)

View File

@ -70,7 +70,7 @@ bash install.sh
```bash ```bash
conda create -n GPTSoVits python=3.9 conda create -n GPTSoVits python=3.9
conda activate GPTSoVits conda activate GPTSoVits
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -97,6 +97,7 @@ conda install -c conda-forge 'ffmpeg<7'
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートフォルダに置きます。 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) と [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) をダウンロードし、GPT-SoVITS のルートフォルダに置きます。
##### MacOS ユーザー ##### MacOS ユーザー
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -104,6 +105,7 @@ brew install ffmpeg
#### 依存関係をインストールします #### 依存関係をインストールします
```bash ```bash
pip install -r extra-req.txt --no-deps
pip install -r requirementx.txt pip install -r requirementx.txt
``` ```
@ -169,6 +171,7 @@ vocal_path|speaker_name|language|text
``` ```
D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin. D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
``` ```
## 微調整と推論 ## 微調整と推論
### WebUI を開く ### WebUI を開く
@ -189,6 +192,7 @@ V1に切り替えたい場合は
```bash ```bash
python webui.py v1 <言語(オプション)> python webui.py v1 <言語(オプション)>
``` ```
または WebUI で手動でバージョンを切り替えてください。 または WebUI で手動でバージョンを切り替えてください。
### 微調整 ### 微調整
@ -213,11 +217,13 @@ python webui.py v1 <言語(オプション)>
```bash ```bash
python GPT_SoVITS/inference_webui.py <言語(オプション)> python GPT_SoVITS/inference_webui.py <言語(オプション)>
``` ```
または または
```bash ```bash
python webui.py python webui.py
``` ```
その後、`1-GPT-SoVITS-TTS/1C-inference`で推論 webui を開きます。 その後、`1-GPT-SoVITS-TTS/1C-inference`で推論 webui を開きます。
## V2 リリースノート ## V2 リリースノート
@ -232,7 +238,7 @@ python webui.py
4. 低品質の参照音声に対する合成品質の向上 4. 低品質の参照音声に対する合成品質の向上
[詳細はこちら](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [詳細はこちら](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
V1 環境から V2 を使用するには: V1 環境から V2 を使用するには:
@ -252,7 +258,7 @@ V1環境からV2を使用するには:
2. GPT モデルがより安定し、繰り返しや省略が減少し、より豊かな感情表現を持つ音声の生成が容易になりました。 2. GPT モデルがより安定し、繰り返しや省略が減少し、より豊かな感情表現を持つ音声の生成が容易になりました。
[詳細情報はこちら](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [詳細情報はこちら](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
v2 環境から v3 を使用する方法: v2 環境から v3 を使用する方法:
@ -285,15 +291,20 @@ v2 環境から v3 を使用する方法:
- [ ] モデルミックス - [ ] モデルミックス
## (追加の) コマンドラインから実行する方法 ## (追加の) コマンドラインから実行する方法
コマンド ラインを使用して UVR5 の WebUI を開きます コマンド ラインを使用して UVR5 の WebUI を開きます
``` ```
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5> python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
``` ```
<!-- ブラウザを開けない場合は、以下の形式に従って UVR 処理を行ってください。これはオーディオ処理に mdxnet を使用しています。 <!-- ブラウザを開けない場合は、以下の形式に従って UVR 処理を行ってください。これはオーディオ処理に mdxnet を使用しています。
``` ```
python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
``` --> ``` -->
コマンド ラインを使用してデータセットのオーディオ セグメンテーションを行う方法は次のとおりです。 コマンド ラインを使用してデータセットのオーディオ セグメンテーションを行う方法は次のとおりです。
``` ```
python audio_slicer.py \ python audio_slicer.py \
--input_path "<path_to_original_audio_file_or_directory>" \ --input_path "<path_to_original_audio_file_or_directory>" \
@ -303,16 +314,21 @@ python audio_slicer.py \
--min_interval <shortest_time_gap_between_adjacent_subclips> --min_interval <shortest_time_gap_between_adjacent_subclips>
--hop_size <step_size_for_computing_volume_curve> --hop_size <step_size_for_computing_volume_curve>
``` ```
コマンドラインを使用してデータセット ASR 処理を行う方法です (中国語のみ) コマンドラインを使用してデータセット ASR 処理を行う方法です (中国語のみ)
``` ```
python tools/asr/funasr_asr.py -i <input> -o <output> python tools/asr/funasr_asr.py -i <input> -o <output>
``` ```
ASR 処理は Faster_Whisper を通じて実行されます(中国語を除く ASR マーキング) ASR 処理は Faster_Whisper を通じて実行されます(中国語を除く ASR マーキング)
(進行状況バーは表示されません。GPU のパフォーマンスにより時間遅延が発生する可能性があります) (進行状況バーは表示されません。GPU のパフォーマンスにより時間遅延が発生する可能性があります)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
``` ```
カスタムリストの保存パスが有効になっています カスタムリストの保存パスが有効になっています
## クレジット ## クレジット
@ -320,6 +336,7 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
特に以下のプロジェクトと貢献者に感謝します: 特に以下のプロジェクトと貢献者に感謝します:
### 理論研究 ### 理論研究
- [ar-vits](https://github.com/innnky/ar-vits) - [ar-vits](https://github.com/innnky/ar-vits)
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
- [vits](https://github.com/jaywalnut310/vits) - [vits](https://github.com/jaywalnut310/vits)
@ -329,17 +346,23 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
- [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py) - [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py)
- [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py) - [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py)
### 事前学習モデル ### 事前学習モデル
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
- [BigVGAN](https://github.com/NVIDIA/BigVGAN) - [BigVGAN](https://github.com/NVIDIA/BigVGAN)
### 推論用テキストフロントエンド ### 推論用テキストフロントエンド
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) - [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
- [split-lang](https://github.com/DoodleBears/split-lang) - [split-lang](https://github.com/DoodleBears/split-lang)
- [g2pW](https://github.com/GitYCC/g2pW) - [g2pW](https://github.com/GitYCC/g2pW)
- [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW) - [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW)
- [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw) - [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw)
### WebUI ツール ### WebUI ツール
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
- [audio-slicer](https://github.com/openvpi/audio-slicer) - [audio-slicer](https://github.com/openvpi/audio-slicer)
- [SubFix](https://github.com/cronrpc/SubFix) - [SubFix](https://github.com/cronrpc/SubFix)

View File

@ -70,7 +70,7 @@ bash install.sh
```bash ```bash
conda create -n GPTSoVits python=3.9 conda create -n GPTSoVits python=3.9
conda activate GPTSoVits conda activate GPTSoVits
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -99,6 +99,7 @@ conda install -c conda-forge 'ffmpeg<7'
[Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) 설치 (Korean TTS 전용) [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) 설치 (Korean TTS 전용)
##### MacOS 사용자 ##### MacOS 사용자
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -106,6 +107,7 @@ brew install ffmpeg
#### 의존성 설치 #### 의존성 설치
```bash ```bash
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -195,6 +197,7 @@ V1으로 전환하려면,
```bash ```bash
python webui.py v1 <언어(옵션)> python webui.py v1 <언어(옵션)>
``` ```
또는 WebUI에서 수동으로 버전을 전환하십시오. 또는 WebUI에서 수동으로 버전을 전환하십시오.
### 미세 조정 ### 미세 조정
@ -219,11 +222,13 @@ python webui.py v1 <언어(옵션)>
```bash ```bash
python GPT_SoVITS/inference_webui.py <언어(옵션)> python GPT_SoVITS/inference_webui.py <언어(옵션)>
``` ```
또는 또는
```bash ```bash
python webui.py python webui.py
``` ```
그런 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다. 그런 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다.
## V2 릴리스 노트 ## V2 릴리스 노트
@ -238,7 +243,7 @@ python webui.py
4. 저품질 참조 오디오에 대한 합성 품질 향상 4. 저품질 참조 오디오에 대한 합성 품질 향상
[자세한 내용](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [자세한 내용](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
V1 환경에서 V2를 사용하려면: V1 환경에서 V2를 사용하려면:
@ -258,7 +263,7 @@ V1 환경에서 V2를 사용하려면:
2. GPT 모델이 더 안정적이며 반복 및 생략이 적고, 더 풍부한 감정 표현을 가진 음성을 생성하기가 더 쉽습니다. 2. GPT 모델이 더 안정적이며 반복 및 생략이 적고, 더 풍부한 감정 표현을 가진 음성을 생성하기가 더 쉽습니다.
[자세한 내용](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [자세한 내용](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
v2 환경에서 v3 사용하기: v2 환경에서 v3 사용하기:
@ -270,7 +275,6 @@ v2 환경에서 v3 사용하기:
추가: 오디오 슈퍼 해상도 모델에 대해서는 [다운로드 방법](../../tools/AP_BWE_main/24kto48k/readme.txt)을 참고하세요. 추가: 오디오 슈퍼 해상도 모델에 대해서는 [다운로드 방법](../../tools/AP_BWE_main/24kto48k/readme.txt)을 참고하세요.
## 할 일 목록 ## 할 일 목록
- [x] **최우선순위:** - [x] **최우선순위:**
@ -293,15 +297,20 @@ v2 환경에서 v3 사용하기:
- [ ] 모델 블렌딩. - [ ] 모델 블렌딩.
## (추가적인) 명령줄에서 실행하는 방법 ## (추가적인) 명령줄에서 실행하는 방법
명령줄을 사용하여 UVR5용 WebUI 열기 명령줄을 사용하여 UVR5용 WebUI 열기
``` ```
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5> python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
``` ```
<!-- 브라우저를 열 수 없는 경우 UVR 처리를 위해 아래 형식을 따르십시오. 이는 오디오 처리를 위해 mdxnet을 사용하는 것입니다. <!-- 브라우저를 열 수 없는 경우 UVR 처리를 위해 아래 형식을 따르십시오. 이는 오디오 처리를 위해 mdxnet을 사용하는 것입니다.
``` ```
python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
``` --> ``` -->
명령줄을 사용하여 데이터세트의 오디오 분할을 수행하는 방법은 다음과 같습니다. 명령줄을 사용하여 데이터세트의 오디오 분할을 수행하는 방법은 다음과 같습니다.
``` ```
python audio_slicer.py \ python audio_slicer.py \
--input_path "<path_to_original_audio_file_or_directory>" \ --input_path "<path_to_original_audio_file_or_directory>" \
@ -311,16 +320,21 @@ python audio_slicer.py \
--min_interval <shortest_time_gap_between_adjacent_subclips> --min_interval <shortest_time_gap_between_adjacent_subclips>
--hop_size <step_size_for_computing_volume_curve> --hop_size <step_size_for_computing_volume_curve>
``` ```
명령줄을 사용하여 데이터 세트 ASR 처리를 수행하는 방법입니다(중국어만 해당). 명령줄을 사용하여 데이터 세트 ASR 처리를 수행하는 방법입니다(중국어만 해당).
``` ```
python tools/asr/funasr_asr.py -i <input> -o <output> python tools/asr/funasr_asr.py -i <input> -o <output>
``` ```
ASR 처리는 Faster_Whisper(중국어를 제외한 ASR 마킹)를 통해 수행됩니다. ASR 처리는 Faster_Whisper(중국어를 제외한 ASR 마킹)를 통해 수행됩니다.
(진행률 표시줄 없음, GPU 성능으로 인해 시간 지연이 발생할 수 있음) (진행률 표시줄 없음, GPU 성능으로 인해 시간 지연이 발생할 수 있음)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision> python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p <precision>
``` ```
사용자 정의 목록 저장 경로가 활성화되었습니다. 사용자 정의 목록 저장 경로가 활성화되었습니다.
## 감사의 말 ## 감사의 말
@ -328,6 +342,7 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
다음 프로젝트와 기여자들에게 특별히 감사드립니다: 다음 프로젝트와 기여자들에게 특별히 감사드립니다:
### 이론 연구 ### 이론 연구
- [ar-vits](https://github.com/innnky/ar-vits) - [ar-vits](https://github.com/innnky/ar-vits)
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
- [vits](https://github.com/jaywalnut310/vits) - [vits](https://github.com/jaywalnut310/vits)
@ -337,17 +352,23 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
- [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py) - [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py)
- [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py) - [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py)
### 사전 학습 모델 ### 사전 학습 모델
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
- [BigVGAN](https://github.com/NVIDIA/BigVGAN) - [BigVGAN](https://github.com/NVIDIA/BigVGAN)
### 추론용 텍스트 프론트엔드 ### 추론용 텍스트 프론트엔드
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) - [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
- [split-lang](https://github.com/DoodleBears/split-lang) - [split-lang](https://github.com/DoodleBears/split-lang)
- [g2pW](https://github.com/GitYCC/g2pW) - [g2pW](https://github.com/GitYCC/g2pW)
- [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW) - [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW)
- [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw) - [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw)
### WebUI 도구 ### WebUI 도구
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
- [audio-slicer](https://github.com/openvpi/audio-slicer) - [audio-slicer](https://github.com/openvpi/audio-slicer)
- [SubFix](https://github.com/cronrpc/SubFix) - [SubFix](https://github.com/cronrpc/SubFix)

View File

@ -72,7 +72,7 @@ bash install.sh
```bash ```bash
conda create -n GPTSoVits python=3.9 conda create -n GPTSoVits python=3.9
conda activate GPTSoVits conda activate GPTSoVits
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -99,6 +99,7 @@ conda install -c conda-forge 'ffmpeg<7'
[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) ve [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) dosyalarını indirin ve GPT-SoVITS kök dizinine yerleştirin. [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) ve [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) dosyalarını indirin ve GPT-SoVITS kök dizinine yerleştirin.
##### MacOS Kullanıcıları ##### MacOS Kullanıcıları
```bash ```bash
brew install ffmpeg brew install ffmpeg
``` ```
@ -106,6 +107,7 @@ brew install ffmpeg
#### Bağımlılıkları Yükleme #### Bağımlılıkları Yükleme
```bash ```bash
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@ -192,6 +194,7 @@ V1'e geçmek istiyorsanız,
```bash ```bash
python webui.py v1 <dil(isteğe bağlı)> python webui.py v1 <dil(isteğe bağlı)>
``` ```
veya WebUI'de manuel olarak sürüm değiştirin. veya WebUI'de manuel olarak sürüm değiştirin.
### İnce Ayar ### İnce Ayar
@ -216,11 +219,13 @@ veya WebUI'de manuel olarak sürüm değiştirin.
```bash ```bash
python GPT_SoVITS/inference_webui.py <dil(isteğe bağlı)> python GPT_SoVITS/inference_webui.py <dil(isteğe bağlı)>
``` ```
VEYA VEYA
```bash ```bash
python webui.py python webui.py
``` ```
ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın. ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın.
## V2 Sürüm Notları ## V2 Sürüm Notları
@ -235,7 +240,7 @@ Yeni Özellikler:
4. Düşük kaliteli referans sesler için geliştirilmiş sentez kalitesi 4. Düşük kaliteli referans sesler için geliştirilmiş sentez kalitesi
[detaylar burada](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [detaylar burada](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
V1 ortamından V2'yi kullanmak için: V1 ortamından V2'yi kullanmak için:
@ -255,7 +260,7 @@ V1 ortamından V2'yi kullanmak için:
2. GPT modeli daha **kararlı** hale geldi, tekrarlar ve atlamalar azaldı ve **daha zengin duygusal ifadeler** ile konuşma üretmek daha kolay hale geldi. 2. GPT modeli daha **kararlı** hale geldi, tekrarlar ve atlamalar azaldı ve **daha zengin duygusal ifadeler** ile konuşma üretmek daha kolay hale geldi.
[daha fazla detay](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) [daha fazla detay](<https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)>)
### v2 ortamında v3 kullanımı: ### v2 ortamında v3 kullanımı:
@ -288,15 +293,20 @@ V1 ortamından V2'yi kullanmak için:
- [ ] model karışımı - [ ] model karışımı
## (Ekstra) Komut satırından çalıştırma yöntemi ## (Ekstra) Komut satırından çalıştırma yöntemi
UVR5 için Web Arayüzünü açmak için komut satırını kullanın UVR5 için Web Arayüzünü açmak için komut satırını kullanın
``` ```
python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5> python tools/uvr5/webui.py "<infer_device>" <is_half> <webui_port_uvr5>
``` ```
<!-- Bir tarayıcı açamıyorsanız, UVR işleme için aşağıdaki formatı izleyin,Bu ses işleme için mdxnet kullanıyor <!-- Bir tarayıcı açamıyorsanız, UVR işleme için aşağıdaki formatı izleyin,Bu ses işleme için mdxnet kullanıyor
``` ```
python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
``` --> ``` -->
Veri setinin ses segmentasyonu komut satırı kullanılarak bu şekilde yapılır Veri setinin ses segmentasyonu komut satırı kullanılarak bu şekilde yapılır
``` ```
python audio_slicer.py \ python audio_slicer.py \
--input_path "<orijinal_ses_dosyası_veya_dizininin_yolu>" \ --input_path "<orijinal_ses_dosyası_veya_dizininin_yolu>" \
@ -306,16 +316,21 @@ python audio_slicer.py \
--min_interval <bitişik_alt_klipler_arasındaki_en_kısa_zaman_aralığı> --min_interval <bitişik_alt_klipler_arasındaki_en_kısa_zaman_aralığı>
--hop_size <ses_eğrisini_hesaplamak_için_adım_boyutu> --hop_size <ses_eğrisini_hesaplamak_için_adım_boyutu>
``` ```
Veri seti ASR işleme komut satırı kullanılarak bu şekilde yapılır (Yalnızca Çince) Veri seti ASR işleme komut satırı kullanılarak bu şekilde yapılır (Yalnızca Çince)
``` ```
python tools/asr/funasr_asr.py -i <girdi> -o <çıktı> python tools/asr/funasr_asr.py -i <girdi> -o <çıktı>
``` ```
ASR işleme Faster_Whisper aracılığıyla gerçekleştirilir (Çince dışındaki ASR işaretleme) ASR işleme Faster_Whisper aracılığıyla gerçekleştirilir (Çince dışındaki ASR işaretleme)
(İlerleme çubukları yok, GPU performansı zaman gecikmelerine neden olabilir) (İlerleme çubukları yok, GPU performansı zaman gecikmelerine neden olabilir)
``` ```
python ./tools/asr/fasterwhisper_asr.py -i <girdi> -o <çıktı> -l <dil> python ./tools/asr/fasterwhisper_asr.py -i <girdi> -o <çıktı> -l <dil>
``` ```
Özel bir liste kaydetme yolu etkinleştirildi Özel bir liste kaydetme yolu etkinleştirildi
## Katkı Verenler ## Katkı Verenler
@ -323,6 +338,7 @@ python ./tools/asr/fasterwhisper_asr.py -i <girdi> -o <çıktı> -l <dil>
Özellikle aşağıdaki projelere ve katkıda bulunanlara teşekkür ederiz: Özellikle aşağıdaki projelere ve katkıda bulunanlara teşekkür ederiz:
### Teorik Araştırma ### Teorik Araştırma
- [ar-vits](https://github.com/innnky/ar-vits) - [ar-vits](https://github.com/innnky/ar-vits)
- [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR) - [SoundStorm](https://github.com/yangdongchao/SoundStorm/tree/master/soundstorm/s1/AR)
- [vits](https://github.com/jaywalnut310/vits) - [vits](https://github.com/jaywalnut310/vits)
@ -332,17 +348,23 @@ python ./tools/asr/fasterwhisper_asr.py -i <girdi> -o <çıktı> -l <dil>
- [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41) - [fish-speech](https://github.com/fishaudio/fish-speech/blob/main/tools/llama/generate.py#L41)
- [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py) - [f5-TTS](https://github.com/SWivid/F5-TTS/blob/main/src/f5_tts/model/backbones/dit.py)
- [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py) - [shortcut flow matching](https://github.com/kvfrans/shortcut-models/blob/main/targets_shortcut.py)
### Önceden Eğitilmiş Modeller ### Önceden Eğitilmiş Modeller
- [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain) - [Chinese Speech Pretrain](https://github.com/TencentGameMate/chinese_speech_pretrain)
- [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large) - [Chinese-Roberta-WWM-Ext-Large](https://huggingface.co/hfl/chinese-roberta-wwm-ext-large)
- [BigVGAN](https://github.com/NVIDIA/BigVGAN) - [BigVGAN](https://github.com/NVIDIA/BigVGAN)
### Tahmin İçin Metin Ön Ucu ### Tahmin İçin Metin Ön Ucu
- [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization) - [paddlespeech zh_normalization](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization)
- [split-lang](https://github.com/DoodleBears/split-lang) - [split-lang](https://github.com/DoodleBears/split-lang)
- [g2pW](https://github.com/GitYCC/g2pW) - [g2pW](https://github.com/GitYCC/g2pW)
- [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW) - [pypinyin-g2pW](https://github.com/mozillazg/pypinyin-g2pW)
- [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw) - [paddlespeech g2pw](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw)
### WebUI Araçları ### WebUI Araçları
- [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui) - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui)
- [audio-slicer](https://github.com/openvpi/audio-slicer) - [audio-slicer](https://github.com/openvpi/audio-slicer)
- [SubFix](https://github.com/cronrpc/SubFix) - [SubFix](https://github.com/cronrpc/SubFix)

1
extra-req.txt Normal file
View File

@ -0,0 +1 @@
faster-whisper

View File

@ -27,7 +27,8 @@
"!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n", "!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n",
"%cd GPT-SoVITS\n", "%cd GPT-SoVITS\n",
"!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n", "!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n",
"!pip install -r requirements.txt" "!pip install -r requirements.txt\n",
"!pip install -r extra-req.txt --no-deps"
] ]
}, },
{ {

View File

@ -1,15 +1,17 @@
#!/bin/bash #!/bin/bash
set -e
# 安装构建工具 # 安装构建工具
# Install build tools # Install build tools
echo "Installing GCC..." echo "Installing GCC..."
conda install -c conda-forge gcc=14 conda install -c conda-forge gcc=14 -y
echo "Installing G++..." echo "Installing G++..."
conda install -c conda-forge gxx conda install -c conda-forge gxx -y
echo "Installing ffmpeg and cmake..." echo "Installing ffmpeg and cmake..."
conda install ffmpeg cmake conda install ffmpeg cmake -y
# 设置编译环境 # 设置编译环境
# Set up build environment # Set up build environment
@ -26,7 +28,6 @@ else
USE_CUDA=false USE_CUDA=false
fi fi
if [ "$USE_CUDA" = false ]; then if [ "$USE_CUDA" = false ]; then
echo "Checking for ROCm installation..." echo "Checking for ROCm installation..."
if [ -d "/opt/rocm" ]; then if [ -d "/opt/rocm" ]; then
@ -56,21 +57,53 @@ else
conda install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 cpuonly -c pytorch conda install pytorch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 cpuonly -c pytorch
fi fi
echo "Installing Python dependencies from requirements.txt..." echo "Installing Python dependencies from requirements.txt..."
# 刷新环境 # 刷新环境
# Refresh environment # Refresh environment
hash -r hash -r
# pyopenjtalk Installation
conda install jq -y
OS_TYPE=$(uname)
PACKAGE_NAME="pyopenjtalk"
VERSION=$(curl -s https://pypi.org/pypi/$PACKAGE_NAME/json | jq -r .info.version)
wget "https://files.pythonhosted.org/packages/source/${PACKAGE_NAME:0:1}/$PACKAGE_NAME/$PACKAGE_NAME-$VERSION.tar.gz"
TAR_FILE=$(ls ${PACKAGE_NAME}-*.tar.gz)
DIR_NAME="${TAR_FILE%.tar.gz}"
tar -xzf "$TAR_FILE"
rm "$TAR_FILE"
CMAKE_FILE="$DIR_NAME/lib/open_jtalk/src/CMakeLists.txt"
if [[ "$OS_TYPE" == "darwin"* ]]; then
sed -i '' -E 's/cmake_minimum_required\(VERSION[^\)]*\)/cmake_minimum_required(VERSION 3.5...3.31)/' "$CMAKE_FILE"
else
sed -i -E 's/cmake_minimum_required\(VERSION[^\)]*\)/cmake_minimum_required(VERSION 3.5...3.31)/' "$CMAKE_FILE"
fi
tar -czf "$TAR_FILE" "$DIR_NAME"
pip install "$TAR_FILE"
rm -rf "$TAR_FILE" "$DIR_NAME"
pip install -r extra-req.txt --no-deps
pip install -r requirements.txt pip install -r requirements.txt
if [ "$USE_ROCM" = true ] && [ "$IS_WSL" = true ]; then if [ "$USE_ROCM" = true ] && [ "$IS_WSL" = true ]; then
echo "Update to WSL compatible runtime lib..." echo "Update to WSL compatible runtime lib..."
location=`pip show torch | grep Location | awk -F ": " '{print $2}'` location=$(pip show torch | grep Location | awk -F ": " '{print $2}')
cd ${location}/torch/lib/ cd "${location}"/torch/lib/ || exit
rm libhsa-runtime64.so* rm libhsa-runtime64.so*
cp /opt/rocm/lib/libhsa-runtime64.so.1.2 libhsa-runtime64.so cp /opt/rocm/lib/libhsa-runtime64.so.1.2 libhsa-runtime64.so
fi fi
echo "Installation completed successfully!" echo "Installation completed successfully!"

View File

@ -3,7 +3,7 @@ scipy
tensorboard tensorboard
librosa==0.9.2 librosa==0.9.2
numba==0.56.4 numba==0.56.4
pytorch-lightning pytorch-lightning>2.0
gradio>=4.0,<=4.24.0 gradio>=4.0,<=4.24.0
ffmpeg-python ffmpeg-python
onnxruntime; sys_platform == 'darwin' onnxruntime; sys_platform == 'darwin'
@ -26,7 +26,6 @@ jieba_fast
jieba jieba
split-lang split-lang
fast_langdetect>=0.3.0 fast_langdetect>=0.3.0
Faster_Whisper
wordsegment wordsegment
rotary_embedding_torch rotary_embedding_torch
ToJyutping ToJyutping
@ -38,4 +37,9 @@ python_mecab_ko; sys_platform != 'win32'
fastapi<0.112.2 fastapi<0.112.2
x_transformers x_transformers
torchmetrics<=1.5 torchmetrics<=1.5
attrdict pydantic<=2.10.6
ctranslate2>=4.0,<5
huggingface_hub>=0.13
tokenizers>=0.13,<1
av>=11
tqdm