diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
index c2c65a1..d8c5ff5 100644
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -217,13 +217,13 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
version, model_version, if_lora_v3=get_sovits_version_from_path_fast(sovits_path)
# print(sovits_path,version, model_version, if_lora_v3)
if if_lora_v3==True and is_exist_s2gv3==False:
- info=i18n("GPT_SoVITS/pretrained_models/s2Gv3.pth v3sovits的底模没下载对,识别为v3sovits的lora没法加载")
+ info= "GPT_SoVITS/pretrained_models/s2Gv3.pth" + i18n("SoVITS V3 底模缺失,无法加载相应 LoRA 权重")
gr.Warning(info)
raise FileExistsError(info)
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
if prompt_language is not None and text_language is not None:
if prompt_language in list(dict_language.keys()):
- prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
+ prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
else:
prompt_text_update = {'__type__':'update', 'value':''}
prompt_language_update = {'__type__':'update', 'value':i18n("中文")}
@@ -534,7 +534,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
print(i18n("实际输入的参考文本:"), prompt_text)
text = text.strip("\n")
# if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
-
+
print(i18n("实际输入的目标文本:"), text)
zero_wav = np.zeros(
int(hps.data.sampling_rate * pause_second),
@@ -864,7 +864,7 @@ def html_left(text, label='p'):
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(
- value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
+ value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + "
" + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
)
with gr.Group():
gr.Markdown(html_center(i18n("模型切换"),'h3'))
@@ -877,8 +877,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.Row():
inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath", scale=13)
with gr.Column(scale=13):
- ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。v3暂不支持该模式,使用了会报错。"), value=False, interactive=True, show_label=True,scale=1)
- gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开。
开启后无视填写的参考文本。")))
+ ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。")+i18n("v3暂不支持该模式,使用了会报错。"), value=False, interactive=True, show_label=True,scale=1)
+ gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT")+"
"+i18n("听不清参考音频说的啥(不晓得写啥)可以开。开启后无视填写的参考文本。")))
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5,scale=1)
with gr.Column(scale=14):
prompt_language = gr.Dropdown(
@@ -909,7 +909,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(html_center(i18n("GPT采样参数(无参考文本时不要太低。不懂就用默认):")))
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=15,interactive=True, scale=1)
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True, scale=1)
- temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True, scale=1)
+ temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True, scale=1)
# with gr.Column():
# gr.Markdown(value=i18n("手工调整音素。当音素框不为空时使用手工音素输入推理,无视目标文本框。"))
# phoneme=gr.Textbox(label=i18n("音素框"), value="")
diff --git a/GPT_SoVITS/inference_webui_fast.py b/GPT_SoVITS/inference_webui_fast.py
index dcc2bcf..5a6910d 100644
--- a/GPT_SoVITS/inference_webui_fast.py
+++ b/GPT_SoVITS/inference_webui_fast.py
@@ -42,7 +42,7 @@ sovits_path = os.environ.get("sovits_path", None)
cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
bert_path = os.environ.get("bert_path", None)
version=os.environ.get("version","v2")
-
+
import gradio as gr
from TTS_infer_pack.TTS import TTS, TTS_Config
from TTS_infer_pack.text_segmentation_method import get_method
@@ -61,7 +61,7 @@ if torch.cuda.is_available():
# device = "mps"
else:
device = "cpu"
-
+
dict_language_v1 = {
i18n("中文"): "all_zh",#全部按中文识别
i18n("英文"): "en",#全部按英文识别#######不变
@@ -106,20 +106,20 @@ if cnhubert_base_path is not None:
tts_config.cnhuhbert_base_path = cnhubert_base_path
if bert_path is not None:
tts_config.bert_base_path = bert_path
-
+
print(tts_config)
tts_pipeline = TTS(tts_config)
gpt_path = tts_config.t2s_weights_path
sovits_path = tts_config.vits_weights_path
version = tts_config.version
-def inference(text, text_lang,
- ref_audio_path,
+def inference(text, text_lang,
+ ref_audio_path,
aux_ref_audio_paths,
- prompt_text,
- prompt_lang, top_k,
- top_p, temperature,
- text_split_method, batch_size,
+ prompt_text,
+ prompt_lang, top_k,
+ top_p, temperature,
+ text_split_method, batch_size,
speed_factor, ref_text_free,
split_bucket,fragment_interval,
seed, keep_random, parallel_infer,
@@ -150,7 +150,7 @@ def inference(text, text_lang,
}
for item in tts_pipeline.run(inputs):
yield item, actual_seed
-
+
def custom_sort_key(s):
# 使用正则表达式提取字符串中的数字部分和非数字部分
parts = re.split('(\d+)', s)
@@ -201,7 +201,7 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
dict_language = dict_language_v1 if tts_pipeline.configs.version =='v1' else dict_language_v2
if prompt_language is not None and text_language is not None:
if prompt_language in list(dict_language.keys()):
- prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
+ prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
else:
prompt_text_update = {'__type__':'update', 'value':''}
prompt_language_update = {'__type__':'update', 'value':i18n("中文")}
@@ -216,9 +216,9 @@ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(
- value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
+ value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") + "
" + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
)
-
+
with gr.Column():
# with gr.Group():
gr.Markdown(value=i18n("模型切换"))
@@ -228,7 +228,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
-
+
with gr.Row():
with gr.Column():
gr.Markdown(value=i18n("*请上传并填写参考信息"))
@@ -242,8 +242,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
)
with gr.Column():
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
- gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。"))
-
+ gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT")+"
"+i18n("听不清参考音频说的啥(不晓得写啥)可以开。开启后无视填写的参考文本。"))
+
with gr.Column():
gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=20, max_lines=20)
@@ -251,7 +251,7 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
label=i18n("需要合成的文本的语种"), choices=list(dict_language.keys()), value=i18n("中文")
)
-
+
with gr.Group():
gr.Markdown(value=i18n("推理设置"))
with gr.Row():
@@ -274,8 +274,8 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
)
parallel_infer = gr.Checkbox(label=i18n("并行推理"), value=True, interactive=True, show_label=True)
split_bucket = gr.Checkbox(label=i18n("数据分桶(并行推理时会降低一点计算量)"), value=True, interactive=True, show_label=True)
-
- with gr.Row():
+
+ with gr.Row():
seed = gr.Number(label=i18n("随机种子"),value=-1)
keep_random = gr.Checkbox(label=i18n("保持随机"), value=True, interactive=True, show_label=True)
@@ -283,15 +283,15 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.Row():
inference_button = gr.Button(i18n("合成语音"), variant="primary")
stop_infer = gr.Button(i18n("终止合成"), variant="primary")
-
-
+
+
inference_button.click(
inference,
[
text,text_language, inp_ref, inp_refs,
- prompt_text, prompt_language,
- top_k, top_p, temperature,
- how_to_cut, batch_size,
+ prompt_text, prompt_language,
+ top_k, top_p, temperature,
+ how_to_cut, batch_size,
speed_factor, ref_text_free,
split_bucket,fragment_interval,
seed, keep_random, parallel_infer,
@@ -315,13 +315,13 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
interactive=True,
)
cut_text= gr.Button(i18n("切分"), variant="primary")
-
+
def to_cut(text_inp, how_to_cut):
if len(text_inp.strip()) == 0 or text_inp==[]:
return ""
method = get_method(cut_method[how_to_cut])
return method(text_inp)
-
+
text_opt = gr.Textbox(label=i18n("切分后文本"), value="", lines=4)
cut_text.click(to_cut, [text_inp, _how_to_cut], [text_opt])
gr.Markdown(value=i18n("后续将支持转音素、手工修改音素、语音合成分步执行。"))
diff --git a/README.md b/README.md
index ec8129d..adc1344 100644
--- a/README.md
+++ b/README.md
@@ -121,9 +121,7 @@ pip install -r requirements.txt
0. Regarding image tags: Due to rapid updates in the codebase and the slow process of packaging and testing images, please check [Docker Hub](https://hub.docker.com/r/breakstring/gpt-sovits) for the currently packaged latest images and select as per your situation, or alternatively, build locally using a Dockerfile according to your own needs.
1. Environment Variables:
-
-- is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation.
-
+ - is_half: Controls half-precision/double-precision. This is typically the cause if the content under the directories 4-cnhubert/5-wav32k is not generated correctly during the "SSL extracting" step. Adjust to True or False based on your actual situation.
2. Volumes Configuration,The application's root directory inside the container is set to /workspace. The default docker-compose.yaml lists some practical examples for uploading/downloading content.
3. shm_size: The default available memory for Docker Desktop on Windows is too small, which can cause abnormal operations. Adjust according to your own situation.
4. Under the deploy section, GPU-related settings should be adjusted cautiously according to your system and actual circumstances.
@@ -158,7 +156,7 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
4. For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`.
-5. For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint.
+5. For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint.
## Dataset Format
@@ -175,7 +173,7 @@ Language dictionary:
- 'en': English
- 'ko': Korean
- 'yue': Cantonese
-
+
Example:
```
@@ -184,61 +182,56 @@ D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
## Finetune and inference
- ### Open WebUI
+### Open WebUI
- #### Integrated Package Users
+#### Integrated Package Users
- Double-click `go-webui.bat`or use `go-webui.ps1`
- if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps1`
+Double-click `go-webui.bat`or use `go-webui.ps1`
+if you want to switch to V1,then double-click`go-webui-v1.bat` or use `go-webui-v1.ps1`
- #### Others
+#### Others
- ```bash
- python webui.py
- ```
+```bash
+python webui.py
+```
- if you want to switch to V1,then
+if you want to switch to V1,then
- ```bash
- python webui.py v1
- ```
+```bash
+python webui.py v1
+```
Or maunally switch version in WebUI
- ### Finetune
+### Finetune
- #### Path Auto-filling is now supported
+#### Path Auto-filling is now supported
- 1.Fill in the audio path
+ 1. Fill in the audio path
+ 2. Slice the audio into small chunks
+ 3. Denoise(optinal)
+ 4. ASR
+ 5. Proofreading ASR transcriptions
+ 6. Go to the next Tab, then finetune the model
- 2.Slice the audio into small chunks
+### Open Inference WebUI
- 3.Denoise(optinal)
+#### Integrated Package Users
- 4.ASR
+Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
- 5.Proofreading ASR transcriptions
+#### Others
- 6.Go to the next Tab, then finetune the model
+```bash
+python GPT_SoVITS/inference_webui.py
+```
+OR
- ### Open Inference WebUI
-
- #### Integrated Package Users
-
- Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
-
- #### Others
-
- ```bash
- python GPT_SoVITS/inference_webui.py
- ```
- OR
-
- ```bash
- python webui.py
- ```
+```bash
+python webui.py
+```
then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
- ## V2 Release Notes
+## V2 Release Notes
New Features:
@@ -248,11 +241,11 @@ New Features:
3. Pre-trained model extended from 2k hours to 5k hours
-4. Improved synthesis quality for low-quality reference audio
+4. Improved synthesis quality for low-quality reference audio
- [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) )
+ [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
-Use v2 from v1 environment:
+Use v2 from v1 environment:
1. `pip install -r requirements.txt` to update some packages
@@ -262,7 +255,7 @@ Use v2 from v1 environment:
Chinese v2 additional: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)(Download G2PW models, unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.
- ## V3 Release Notes
+## V3 Release Notes
New Features:
@@ -270,9 +263,9 @@ New Features:
2. GPT model is more stable, with fewer repetitions and omissions, and it is easier to generate speech with richer emotional expression.
- [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) )
+ [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
-Use v3 from v2 environment:
+Use v3 from v2 environment:
1. `pip install -r requirements.txt` to update some packages
@@ -310,7 +303,7 @@ python tools/uvr5/webui.py ""
```
This is how the audio segmentation of the dataset is done using the command line
```
@@ -319,7 +312,7 @@ python audio_slicer.py \
--output_root "" \
--threshold \
--min_length \
- --min_interval
+ --min_interval
--hop_size
```
This is how dataset ASR processing is done using the command line(Only Chinese)
diff --git a/docs/cn/README.md b/docs/cn/README.md
index 0bf7031..6196099 100644
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@@ -11,7 +11,7 @@
[](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
[](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)
+[](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
[](https://discord.gg/dnrgs5GHfG)
[**English**](../../README.md) | **中文简体** | [**日本語**](../ja/README.md) | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
@@ -208,17 +208,12 @@ python webui.py v1
#### 现已支持自动填充路径
- 1.填入训练音频路径
-
- 2.切割音频
-
- 3.进行降噪(可选)
-
- 4.进行ASR
-
- 5.校对标注
-
- 6.前往下一个窗口,点击训练
+ 1. 填入训练音频路径
+ 2. 切割音频
+ 3. 进行降噪(可选)
+ 4. 进行ASR
+ 5. 校对标注
+ 6. 前往下一个窗口,点击训练
### 打开推理WebUI
@@ -271,7 +266,7 @@ python webui.py
2. GPT合成更稳定,重复漏字更少,也更容易跑出丰富情感
详见[wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
-
+
从v2环境迁移至v3
1. 需要pip安装requirements.txt更新环境
@@ -280,7 +275,7 @@ python webui.py
3. 从[huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)下载这些v3新增预训练模型 (s1v3.ckpt, s2Gv3.pth and models--nvidia--bigvgan_v2_24khz_100band_256x folder)将他们放到`GPT_SoVITS\pretrained_models`目录下
- 如果想用音频超分功能缓解v3模型生成24k音频觉得闷的问题,需要下载额外的模型参数,参考[how to download](./tools/AP_BWE_main/24kto48k/readme.txt)
+ 如果想用音频超分功能缓解v3模型生成24k音频觉得闷的问题,需要下载额外的模型参数,参考[how to download](../../tools/AP_BWE_main/24kto48k/readme.txt)
## 待办事项清单
@@ -310,7 +305,7 @@ python tools/uvr5/webui.py ""
````
这是使用命令行完成数据集的音频切分的方式
````
@@ -319,7 +314,7 @@ python audio_slicer.py \
--output_root "" \
--threshold \
--min_length \
- --min_interval
+ --min_interval
--hop_size
````
这是使用命令行完成数据集ASR处理的方式(仅限中文)
diff --git a/docs/en/Changelog_EN.md b/docs/en/Changelog_EN.md
index 87c59a6..3c3fe18 100644
--- a/docs/en/Changelog_EN.md
+++ b/docs/en/Changelog_EN.md
@@ -186,4 +186,37 @@
### 20250211 Update
-1. [Wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) Added GPT-SoVITS v3 Model.
\ No newline at end of file
+- [Wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) Added GPT-SoVITS v3 Model, Need 14GB GPU Memory to Fine-tune SoVITS v3.
+
+### 20250212 Update
+
+- [PR 2040](https://github.com/RVC-Boss/GPT-SoVITS/pull/2040) Added gradient checkpointing to Fine-tune SoVITS v3, Need 12GB GPU Memory.
+
+### 20250214 Update
+
+- [PR 2047](https://github.com/RVC-Boss/GPT-SoVITS/pull/2047) Optimize the multilingual mixed text segmentation strategy **A**.
+ -AAdded `split-lang` as a language segmentation tool to improve segmentation capabilities for multi-language mixed text.
+
+### 20250217 Update
+
+- [PR 2062](https://github.com/RVC-Boss/GPT-SoVITS/pull/2062) Optimize the logic for handling numbers and English in the text.
+
+### 20250218 Update
+
+- [PR 2073](https://github.com/RVC-Boss/GPT-SoVITS/pull/2073) Optimize the multilingual mixed text segmentation strategy **B**.
+
+### 20250223 Update
+
+1. LoRA training is supported for fine-tuning with SoVITS V3. It requires 8GB GPU Memory and the results are better than full parameter fine-tuning.
+2. [PR 2078](https://github.com/RVC-Boss/GPT-SoVITS/pull/2078) Added Mel Band RoFormer model for Vocal & Instrument Separation.
+
+### 20250226 Update
+
+1. [PR 2112](https://github.com/RVC-Boss/GPT-SoVITS/pull/2112) Fix issues caused by non-English directories in Windows.
+ - Using `langsegmenter` for Korean.
+2. [PR 2113](https://github.com/RVC-Boss/GPT-SoVITS/pull/2114) Fix issues caused by non-English directories in Windows.
+ - Using `langsegmenter` for Korean/Japanese.
+
+### 20250227 Update
+
+- Added 24K to 48K audio super-resolution models to alleviate the muffled issue when generating 24K audio with V3 model, as reported in [Issue 2085](https://github.com/RVC-Boss/GPT-SoVITS/issues/2085), [Issue 2117](https://github.com/RVC-Boss/GPT-SoVITS/issues/2117).
\ No newline at end of file
diff --git a/docs/ja/Changelog_JA.md b/docs/ja/Changelog_JA.md
index 0a70d71..53c28ff 100644
--- a/docs/ja/Changelog_JA.md
+++ b/docs/ja/Changelog_JA.md
@@ -185,4 +185,37 @@
### 20250211 更新
-1. [Wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) GPT-SoVITS v3 モデルを追加しました。
\ No newline at end of file
+1. [Wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7)) GPT-SoVITS v3 モデルを追加しました。SoVITS v3のファインチューニングには14GBのGPUメモリが必要です。
+
+### 20250212 更新
+
+- [PR 2040](https://github.com/RVC-Boss/GPT-SoVITS/pull/2040) SoVITS v3のファインチューニングにグラデーションチェックポイントを追加、12GBのGPUメモリが必要です。
+
+### 20250214 更新
+
+- [PR 2047](https://github.com/RVC-Boss/GPT-SoVITS/pull/2047) 多言語混合テキスト分割戦略の最適化 **A**。
+ - `split-lang`を言語分割ツールとして追加し、多言語混合テキストの分割能力を向上させました。
+
+### 20250217 更新
+
+- [PR 2062](https://github.com/RVC-Boss/GPT-SoVITS/pull/2062) テキスト内の数字と英語の処理ロジックを最適化。
+
+### 20250218 更新
+
+- [PR 2073](https://github.com/RVC-Boss/GPT-SoVITS/pull/2073) 多言語混合テキスト分割戦略の最適化 **B**。
+
+### 20250223 更新
+
+1. LoRAトレーニングがSoVITS V3のファインチューニングに対応しました。8GBのGPUメモリが必要で、結果はフルパラメータファインチューニングより優れています。
+2. [PR 2078](https://github.com/RVC-Boss/GPT-SoVITS/pull/2078) ボーカルと楽器分離のためにMel Band RoFormerモデルを追加しました。
+
+### 20250226 更新
+
+1. [PR 2112](https://github.com/RVC-Boss/GPT-SoVITS/pull/2112) Windowsでの非英語ディレクトリによる問題を修正しました。
+ - `langsegmenter`を使用して韓国語の問題を修正。
+2. [PR 2113](https://github.com/RVC-Boss/GPT-SoVITS/pull/2114) Windowsでの非英語ディレクトリによる問題を修正しました。
+ - `langsegmenter`を使用して韓国語/日本語の問題を修正。
+
+### 20250227 更新
+
+- V3モデルで24Kオーディオを生成する際に発生するこもった音の問題を緩和するために、24Kから48Kのオーディオ超解像モデルを追加しました。[Issue 2085](https://github.com/RVC-Boss/GPT-SoVITS/issues/2085)、[Issue 2117](https://github.com/RVC-Boss/GPT-SoVITS/issues/2117)で報告されています。
\ No newline at end of file
diff --git a/docs/ja/README.md b/docs/ja/README.md
index c8ca431..8c815e8 100644
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@@ -9,7 +9,7 @@
[](https://colab.research.google.com/github/RVC-Boss/GPT-SoVITS/blob/main/colab_webui.ipynb)
[](https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE)
-[](https://huggingface.co/lj1995/GPT-SoVITS/tree/main)
+[](https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2)
[](https://discord.gg/dnrgs5GHfG)
[**English**](../../README.md) | [**中文简体**](../cn/README.md) | **日本語** | [**한국어**](../ko/README.md) | [**Türkçe**](../tr/README.md)
@@ -195,17 +195,12 @@ python webui.py v1 <言語(オプション)>
#### パス自動補完のサポート
- 1.音声パスを入力する
-
- 2.音声を小さなチャンクに分割する
-
- 3.ノイズ除去(オプション)
-
- 4.ASR
-
- 5.ASR転写を校正する
-
- 6.次のタブに移動し、モデルを微調整する
+ 1. 音声パスを入力する
+ 2. 音声を小さなチャンクに分割する
+ 3. ノイズ除去(オプション)
+ 4. ASR
+ 5. ASR転写を校正する
+ 6. 次のタブに移動し、モデルを微調整する
### 推論WebUIを開く
@@ -249,7 +244,25 @@ V1環境からV2を使用するには:
中国語V2追加: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)(G2PWモデルをダウンロードし、解凍して`G2PWModel`にリネームし、`GPT_SoVITS/text`に配置します)
+## V3 リリースノート
+新機能:
+
+1. 音色の類似性が向上し、ターゲットスピーカーを近似するために必要な学習データが少なくなりました(音色の類似性は、ファインチューニングなしでベースモデルを直接使用することで顕著に改善されます)。
+
+2. GPTモデルがより安定し、繰り返しや省略が減少し、より豊かな感情表現を持つ音声の生成が容易になりました。
+
+ [詳細情報はこちら](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+
+v2 環境から v3 を使用する方法:
+
+1. `pip install -r requirements.txt` を実行して、いくつかのパッケージを更新します。
+
+2. GitHubから最新のコードをクローンします。
+
+3. v3の事前学習済みモデル(s1v3.ckpt、s2Gv3.pth、models--nvidia--bigvgan_v2_24khz_100band_256x フォルダ)を[Huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) からダウンロードし、GPT_SoVITS\pretrained_models フォルダに配置します。
+
+ 追加: 音声超解像モデルについては、[ダウンロード方法](../../tools/AP_BWE_main/24kto48k/readme.txt)を参照してください。
## Todo リスト
@@ -276,10 +289,10 @@ V1環境からV2を使用するには:
```
python tools/uvr5/webui.py ""
```
-ブラウザを開けない場合は、以下の形式に従って UVR 処理を行ってください。これはオーディオ処理に mdxnet を使用しています。
-```
-python mdxnet.py --model --input_root --output_vocal --output_ins --agg_level --format --device --is_half_precision
+
コマンド ラインを使用してデータセットのオーディオ セグメンテーションを行う方法は次のとおりです。
```
python audio_slicer.py \
@@ -287,7 +300,7 @@ python audio_slicer.py \
--output_root "" \
--threshold \
--min_length \
- --min_interval
+ --min_interval
--hop_size
```
コマンドラインを使用してデータセット ASR 処理を行う方法です (中国語のみ)
@@ -314,12 +327,18 @@ python ./tools/asr/fasterwhisper_asr.py -i -o