From 00f417ea068af55ccc6e586052ff99f31fd643cb Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Wed, 7 Aug 2024 18:49:55 +0800
Subject: [PATCH 1/6] Update models.py

---
 GPT_SoVITS/module/models.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPT_SoVITS/module/models.py b/GPT_SoVITS/module/models.py
index 6bfee085..968c4cbf 100644
--- a/GPT_SoVITS/module/models.py
+++ b/GPT_SoVITS/module/models.py
@@ -1,3 +1,5 @@
+import warnings
+warnings.filterwarnings("ignore")
 import copy
 import math
 import os

From 893b45246b47f029edae1dc963f524758603354a Mon Sep 17 00:00:00 2001
From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Date: Thu, 8 Aug 2024 18:05:05 +0800
Subject: [PATCH 2/6] Update onnx_api.py

---
 GPT_SoVITS/text/g2pw/onnx_api.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPT_SoVITS/text/g2pw/onnx_api.py b/GPT_SoVITS/text/g2pw/onnx_api.py
index 374c9a4e..32fc2c01 100644
--- a/GPT_SoVITS/text/g2pw/onnx_api.py
+++ b/GPT_SoVITS/text/g2pw/onnx_api.py
@@ -86,10 +86,10 @@ class G2PWOnnxConverter:
         sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
         sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
         sess_options.intra_op_num_threads = 2
-        self.session_g2pW = onnxruntime.InferenceSession(
-            os.path.join(uncompress_path, 'g2pW.onnx'),
-            sess_options=sess_options, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
-            # sess_options=sess_options)
+        try:
+            self.session_g2pW = onnxruntime.InferenceSession(os.path.join(uncompress_path, 'g2pW.onnx'),sess_options=sess_options, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        except:
+            self.session_g2pW = onnxruntime.InferenceSession(os.path.join(uncompress_path, 'g2pW.onnx'),sess_options=sess_options, providers=['CPUExecutionProvider'])
         self.config = load_config(
             config_path=os.path.join(uncompress_path, 'config.py'),
             use_default=True)

From 2310bcde5378930a1472570be0f54d766616b04b Mon Sep 17 00:00:00 2001
From: KamioRinn <63162909+KamioRinn@users.noreply.github.com>
Date: Sat, 10 Aug 2024 12:28:53 +0800
Subject: [PATCH 3/6] Optimize short sentence (#1430)

---
 GPT_SoVITS/inference_webui.py | 7 +++++--
 GPT_SoVITS/text/cleaner.py    | 2 +-
 api.py                        | 5 ++++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
index 727b9f7b..878f8d85 100644
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -299,7 +299,7 @@ def get_first(text):
     return text
 
 from text import chinese
-def get_phones_and_bert(text,language,version):
+def get_phones_and_bert(text,language,version,final=False):
     if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
         language = language.replace("all_","")
         if language == "en":
@@ -366,6 +366,9 @@ def get_phones_and_bert(text,language,version):
         phones = sum(phones_list, [])
         norm_text = ''.join(norm_text_list)
 
+    if not final and len(phones) < 6:
+        return get_phones_and_bert("." + text,language,version,final=True)
+
     return phones,bert.to(dtype),norm_text
 
 
@@ -408,7 +411,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language,
         if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
         print(i18n("实际输入的参考文本:"), prompt_text)
     text = text.strip("\n")
-    if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
+    # if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
     
     print(i18n("实际输入的目标文本:"), text)
     zero_wav = np.zeros(
diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py
index 1091a342..298e4d28 100644
--- a/GPT_SoVITS/text/cleaner.py
+++ b/GPT_SoVITS/text/cleaner.py
@@ -45,7 +45,7 @@ def clean_text(text, language, version=None):
     elif language == "en":
         phones = language_module.g2p(norm_text)
         if len(phones) < 4:
-            phones = [','] * (4 - len(phones)) + phones
+            phones = [','] + phones
         word2ph = None
     else:
         phones = language_module.g2p(norm_text)
diff --git a/api.py b/api.py
index e510ab95..3b173948 100644
--- a/api.py
+++ b/api.py
@@ -275,7 +275,7 @@ def get_bert_inf(phones, word2ph, norm_text, language):
     return bert
 
 from text import chinese
-def get_phones_and_bert(text,language,version):
+def get_phones_and_bert(text,language,version,final=False):
     if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
         language = language.replace("all_","")
         if language == "en":
@@ -340,6 +340,9 @@ def get_phones_and_bert(text,language,version):
         phones = sum(phones_list, [])
         norm_text = ''.join(norm_text_list)
 
+    if not final and len(phones) < 6:
+        return get_phones_and_bert("." + text,language,version,final=True)
+
     return phones,bert.to(torch.float16 if is_half == True else torch.float32),norm_text
 
 

From 62831dfcc7ec236cc767c78add8bb950f5444313 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=93=9D=E6=A2=A6=E5=AE=9E?=
 <36986837+SapphireLab@users.noreply.github.com>
Date: Mon, 12 Aug 2024 10:42:52 +0800
Subject: [PATCH 4/6] fix_onlyasr (#1433)

---
 tools/asr/fasterwhisper_asr.py |   7 +--
 tools/asr/funasr_asr.py        | 100 +++++++++++++++++++--------------
 2 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/tools/asr/fasterwhisper_asr.py b/tools/asr/fasterwhisper_asr.py
index da8eadfb..d46cbbd7 100644
--- a/tools/asr/fasterwhisper_asr.py
+++ b/tools/asr/fasterwhisper_asr.py
@@ -68,10 +68,9 @@ def execute_asr(input_folder, output_folder, model_size, language, precision):
 
             if info.language == "zh":
                 print("检测为中文文本, 转 FunASR 处理")
-                if("only_asr"not in globals()):
-                    from tools.asr.funasr_asr import \
-                        only_asr  # #如果用英文就不需要导入下载模型
-                text = only_asr(file_path)
+                if("only_asr" not in globals()):
+                    from tools.asr.funasr_asr import only_asr  #如果用英文就不需要导入下载模型
+                text = only_asr(file_path, language=info.language.lower())
 
             if text == '':
                 for segment in segments:
diff --git a/tools/asr/funasr_asr.py b/tools/asr/funasr_asr.py
index 11209ada..fe520e24 100644
--- a/tools/asr/funasr_asr.py
+++ b/tools/asr/funasr_asr.py
@@ -3,30 +3,72 @@
 import argparse
 import os
 import traceback
-from tqdm import tqdm
+
 # from funasr.utils import version_checker
 # version_checker.check_for_update = lambda: None
 from funasr import AutoModel
+from tqdm import tqdm
 
+funasr_models = {} # 存储模型避免重复加载
 
-def only_asr(input_file):
+def only_asr(input_file, language):
     try:
+        model = create_model(language)
         text = model.generate(input=input_file)[0]["text"]
     except:
         text = ''
         print(traceback.format_exc())
     return text
 
+def create_model(language="zh"):
+    path_vad  = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
+    path_punc = 'tools/asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
+    path_vad  = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
+    path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
+    vad_model_revision = punc_model_revision = "v2.0.4"
+
+    if language == "zh":
+        path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
+        path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
+        model_revision = "v2.0.4"
+    elif language == "yue":
+        path_asr = 'tools/asr/models/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online'
+        path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online"
+        model_revision = "master"
+        path_vad = path_punc = None
+        vad_model_revision = punc_model_revision = None
+        ###友情提示：粤语带VAD识别可能会有少量shape不对报错的，但是不带VAD可以.不带vad只能分阶段单独加标点。不过标点模型对粤语效果真的不行…
+    else:
+        raise ValueError("FunASR 不支持该语言" + ": " + language)
+
+    if language in funasr_models:
+        return funasr_models[language]
+    else:
+        model = AutoModel(
+            model               = path_asr,
+            model_revision      = model_revision,
+            vad_model           = path_vad,
+            vad_model_revision  = vad_model_revision,
+            punc_model          = path_punc,
+            punc_model_revision = punc_model_revision,
+        )
+        print(f"FunASR 模型加载完成: {language.upper()}")
+
+        funasr_models[language] = model
+        return model
+
 def execute_asr(input_folder, output_folder, model_size, language):
     input_file_names = os.listdir(input_folder)
     input_file_names.sort()
     
     output = []
     output_file_name = os.path.basename(input_folder)
+    
+    model = create_model(language)
 
     for file_name in tqdm(input_file_names):
         try:
-            print(file_name)
+            print("\n" + file_name)
             file_path = os.path.join(input_folder, file_name)
             text = model.generate(input=file_path)[0]["text"]
             output.append(f"{file_path}|{output_file_name}|{language.upper()}|{text}")
@@ -42,47 +84,19 @@ def execute_asr(input_folder, output_folder, model_size, language):
         print(f"ASR 任务完成->标注文件路径: {output_file_path}\n")
     return output_file_path
 
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-i", "--input_folder", type=str, required=True,
-                    help="Path to the folder containing WAV files.")
-parser.add_argument("-o", "--output_folder", type=str, required=True,
-                    help="Output folder to store transcriptions.")
-parser.add_argument("-s", "--model_size", type=str, default='large',
-                    help="Model Size of FunASR is Large")
-parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh','yue','auto'],
-                    help="Language of the audio files.")
-parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'],
-                    help="fp16 or fp32")#还没接入
-
-cmd = parser.parse_args()
-
-path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
-path_punc = 'tools/asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
-path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
-path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
-vad_model_revision=punc_model_revision="v2.0.4"
-
-if(cmd.language=="zh"):
-    path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
-    path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
-    model_revision="v2.0.4"
-else:
-    path_asr = 'tools/asr/models/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online'
-    path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online"
-    model_revision="master"
-    path_vad=path_punc=vad_model_revision=punc_model_revision=None###友情提示：粤语带VAD识别可能会有少量shape不对报错的，但是不带VAD可以.不带vad只能分阶段单独加标点。不过标点模型对粤语效果真的不行…
-
-model = AutoModel(
-    model=path_asr,
-    model_revision=model_revision,
-    vad_model=path_vad,
-    vad_model_revision=vad_model_revision,
-    punc_model=path_punc,
-    punc_model_revision=punc_model_revision,
-)
-
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input_folder", type=str, required=True,
+                        help="Path to the folder containing WAV files.")
+    parser.add_argument("-o", "--output_folder", type=str, required=True,
+                        help="Output folder to store transcriptions.")
+    parser.add_argument("-s", "--model_size", type=str, default='large',
+                        help="Model Size of FunASR is Large")
+    parser.add_argument("-l", "--language", type=str, default='zh', choices=['zh','yue','auto'],
+                        help="Language of the audio files.")
+    parser.add_argument("-p", "--precision", type=str, default='float16', choices=['float16','float32'],
+                        help="fp16 or fp32")#还没接入
+    cmd = parser.parse_args()
     execute_asr(
         input_folder  = cmd.input_folder,
         output_folder = cmd.output_folder,

From d552c971bfe25e85fe0da6d8cf661d1051cdee6c Mon Sep 17 00:00:00 2001
From: AkitoLiu <39857739+Akito-UzukiP@users.noreply.github.com>
Date: Mon, 12 Aug 2024 10:43:36 +0800
Subject: [PATCH 5/6] add modifiable japanese dict (#1443)

---
 GPT_SoVITS/text/ja_userdic/userdict.csv |  1 +
 GPT_SoVITS/text/japanese.py             | 24 +++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 GPT_SoVITS/text/ja_userdic/userdict.csv

diff --git a/GPT_SoVITS/text/ja_userdic/userdict.csv b/GPT_SoVITS/text/ja_userdic/userdict.csv
new file mode 100644
index 00000000..b23e0d63
--- /dev/null
+++ b/GPT_SoVITS/text/ja_userdic/userdict.csv
@@ -0,0 +1 @@
+主殿,*,*,-32767,名詞,固有名詞,一般,*,*,*,アルジドノ,アルジドノ,アルジドノ,3/5,*
\ No newline at end of file
diff --git a/GPT_SoVITS/text/japanese.py b/GPT_SoVITS/text/japanese.py
index 4c10720e..e9fe7c1a 100644
--- a/GPT_SoVITS/text/japanese.py
+++ b/GPT_SoVITS/text/japanese.py
@@ -1,8 +1,30 @@
 # modified from https://github.com/CjangCjengh/vits/blob/main/text/japanese.py
 import re
-import sys
 
 import pyopenjtalk
+import os
+import hashlib
+current_file_path = os.path.dirname(__file__)
+def get_hash(fp: str) -> str:
+    hash_md5 = hashlib.md5()
+    with open(fp, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+
+USERDIC_CSV_PATH = os.path.join(current_file_path, "ja_userdic", "userdict.csv")
+USERDIC_BIN_PATH = os.path.join(current_file_path, "ja_userdic", "user.dict")
+USERDIC_HASH_PATH = os.path.join(current_file_path, "ja_userdic", "userdict.md5")
+# 如果没有用户词典，就生成一个；如果有，就检查md5，如果不一样，就重新生成
+if os.path.exists(USERDIC_CSV_PATH):
+    if not os.path.exists(USERDIC_BIN_PATH) or get_hash(USERDIC_CSV_PATH) != open(USERDIC_HASH_PATH, "r",encoding='utf-8').read():
+        pyopenjtalk.mecab_dict_index(USERDIC_CSV_PATH, USERDIC_BIN_PATH)
+        with open(USERDIC_HASH_PATH, "w", encoding='utf-8') as f:
+            f.write(get_hash(USERDIC_CSV_PATH))
+
+if os.path.exists(USERDIC_BIN_PATH):
+    pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)    
+
 
 from text.symbols import punctuation
 # Regular expression matching Japanese without punctuation marks:

From c60e796452650f15d1b8b45cc428226597397286 Mon Sep 17 00:00:00 2001
From: Lion-Wu <130235128+Lion-Wu@users.noreply.github.com>
Date: Mon, 12 Aug 2024 10:47:02 +0800
Subject: [PATCH 6/6] Update README (#1423)

Update README (#1423)
---
 README.md         |  48 +++++++------------
 docs/cn/README.md |  62 ++++++++++--------------
 docs/ja/README.md | 119 ++++++++++++++++++++++++++++++++++++++-------
 docs/ko/README.md | 120 +++++++++++++++++++++++++++++++++++++++-------
 docs/tr/README.md | 116 +++++++++++++++++++++++++++++++++++++-------
 5 files changed, 348 insertions(+), 117 deletions(-)

diff --git a/README.md b/README.md
index 98fc0d0a..91f15703 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-
 
 ## Installation
 
-For users in the China region, you can [click here](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official) to use AutoDL Cloud Docker to experience the full functionality online.
+For users in China, you can [click here](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official) to use AutoDL Cloud Docker to experience the full functionality online.
 
 ### Tested Environments
 
@@ -53,7 +53,7 @@ _Note: numba==0.56.4 requires py<3.11_
 
 If you are a Windows user (tested with win>=10), you can [download the integrated package](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true) and double-click on _go-webui.bat_ to start GPT-SoVITS-WebUI.
 
-Users in the China region can [download the package](https://www.icloud.com.cn/iclouddrive/030K8WjGJ9xMXhpzJVIMEWPzQ#GPT-SoVITS-beta0706fix1) by clicking the link and then selecting "Download a copy." (Log out if you encounter errors while downloading.)
+**Users in China can [download the package here](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#KTvnO).**
 
 ### Linux
 
@@ -141,31 +141,17 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
 
 ## Pretrained Models
 
-Download pretrained models from [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) and place them in `GPT_SoVITS/pretrained_models`.
+**Users in China can [download all these models here](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#nVNhX).**
 
-Download G2PW models from [G2PWModel-v2-onnx.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip), unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS\text`.(Chinese TTS Only)
+1. Download pretrained models from [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) and place them in `GPT_SoVITS/pretrained_models`.
 
-For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
+2. Download G2PW models from [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip), unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.(Chinese TTS Only)
 
-Users in the China region can download these two models by entering the links below and clicking "Download a copy" (Log out if you encounter errors while downloading.)
+3. For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
 
-- [GPT-SoVITS Models](https://www.icloud.com/iclouddrive/044boFMiOHHt22SNr-c-tirbA#pretrained_models)
+4. For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`.
 
-- [UVR5 Weights](https://www.icloud.com.cn/iclouddrive/0bekRKDiJXboFhbfm3lM2fVbA#UVR5_Weights)
-
-- [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（Download G2PW models,  unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS\text`.
-
-For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`.
-
-Or Download FunASR Model from [FunASR Model](https://www.icloud.com/iclouddrive/0b52_7SQWYr75kHkPoPXgpeQA#models), unzip and replace `tools/asr/models`.(Log out if you encounter errors while downloading.)
-
-For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint. 
-
-Users in the China region can download this model by entering the links below
-
-- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/00bUEp9_mcjMq_dhHu_vrAFDQ#faster-whisper-large-v3) (Click "Download a copy", log out if you encounter errors while downloading.)
-
-- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3) (HuggingFace mirror site)
+5. For English or Japanese ASR (additionally), download models from [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) and place them in `tools/asr/models`. Also, [other models](https://huggingface.co/Systran) may have the similar effect with smaller disk footprint. 
 
 ## Dataset Format
 
@@ -249,25 +235,25 @@ then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
 
 New Features:
 
-  1.Support Korean and Cantonese
+1. Support Korean and Cantonese
 
-  2.An optimized text frontend
+2. An optimized text frontend
 
-  3.Pre-trained model extended from 2k hours to 5k hours
+3. Pre-trained model extended from 2k hours to 5k hours
 
-  4.Improved synthesis quality for low-quality reference audio 
+4. Improved synthesis quality for low-quality reference audio 
 
-  [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) ) 
+    [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7) ) 
 
 Use v2 from v1 environment: 
 
-  1.pip install -r requirements.txt to update some packages
+1. `pip install -r requirements.txt` to update some packages
 
-  2.clone the latest codes from github
+2. Clone the latest codes from github.
 
-  3.download v2 pretrained models from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) and put them into GPT_SoVITS\pretrained_models\gsv-v2final-pretrained
+3. Download v2 pretrained models from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) and put them into `GPT_SoVITS\pretrained_models\gsv-v2final-pretrained`.
 
-  Chinese v2 additional: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（Download G2PW models,  unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS\text`.
+    Chinese v2 additional: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（Download G2PW models,  unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.
      
 ## Todo List
 
diff --git a/docs/cn/README.md b/docs/cn/README.md
index ea67f9dc..8c86b48a 100644
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@@ -38,7 +38,7 @@ https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-
 
 ## 安装
 
-中国地区用户可[点击此处](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official)使用 AutoDL 云端镜像进行体验。
+中国地区的用户可[点击此处](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official)使用 AutoDL 云端镜像进行体验。
 
 ### 测试通过的环境
 
@@ -53,7 +53,7 @@ _注: numba==0.56.4 需要 python<3.11_
 
 如果你是 Windows 用户（已在 win>=10 上测试），可以下载[下载整合包](https://huggingface.co/lj1995/GPT-SoVITS-windows-package/resolve/main/GPT-SoVITS-beta.7z?download=true)，解压后双击 go-webui.bat 即可启动 GPT-SoVITS-WebUI。
 
-中国地区用户可以通过点击链接并选择“下载副本”[下载整合包](https://www.icloud.com.cn/iclouddrive/030K8WjGJ9xMXhpzJVIMEWPzQ#GPT-SoVITS-beta0706fix1)。（如果下载时遇到错误，请退出登录）
+**中国地区的用户可以[在此处下载整合包](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#KTvnO)。**
 
 ### Linux
 
@@ -141,31 +141,17 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
 
 ## 预训练模型
 
-从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型，并将它们放置在 `GPT_SoVITS\pretrained_models` 中。
+**中国地区的用户可以[在此处下载这些模型](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e/dkxgpiy9zb96hob4#nVNhX)。**
 
-从 [G2PWModel-v2-onnx.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip) 下载G2PW模型,并将它们解压重命名为`G2PWModel` 后放置在 `GPT_SoVITS\text` 中。（仅限中文TTS）
+1. 从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型，并将其放置在 `GPT_SoVITS/pretrained_models` 目录中。
 
-对于 UVR5（人声/伴奏分离和混响移除，附加），从 [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) 下载模型，并将它们放置在 `tools/uvr5/uvr5_weights` 中。
+2. 从 [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip) 下载模型，解压并重命名为 `G2PWModel`，然后将其放置在 `GPT_SoVITS/text` 目录中。（仅限中文TTS）
 
-中国地区用户可以进入以下链接并点击“下载副本”下载以上两个模型（如果下载时遇到错误，请退出登录）：
+3. 对于 UVR5（人声/伴奏分离和混响移除，额外功能），从 [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) 下载模型，并将其放置在 `tools/uvr5/uvr5_weights` 目录中。
 
-- [GPT-SoVITS Models](https://www.icloud.com/iclouddrive/044boFMiOHHt22SNr-c-tirbA#pretrained_models)
-
-- [UVR5 Weights](https://www.icloud.com.cn/iclouddrive/0bekRKDiJXboFhbfm3lM2fVbA#UVR5_Weights)
-
-- [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（下载G2PW模型,并将它们解压重命名为 `G2PWModel` 后放置在 `GPT_SoVITS\text` 中）
-
-对于中文自动语音识别（附加），从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型，并将它们放置在 `tools/asr/models` 中。
-
-或者从[FunASR模型链接](https://www.icloud.com/iclouddrive/0b52_7SQWYr75kHkPoPXgpeQA#models)下载模型，并将它们解压后替换 `tools/asr/models` 。（点击“下载副本”，如果下载时遇到错误，请退出登录）
-
-对于英语与日语自动语音识别（附加）,从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型，并将它们放置在 `tools/asr/models` 中。 此外，[其他模型](https://huggingface.co/Systran)可能具有类似效果，但占用更小的磁盘空间。
-
-中国地区用户可以通过以下链接下载：
-- [Faster Whisper Large V3](https://www.icloud.com/iclouddrive/00bUEp9_mcjMq_dhHu_vrAFDQ#faster-whisper-large-v3)（点击“下载副本”，如果下载时遇到错误，请退出登录）
-  
-- [Faster Whisper Large V3](https://hf-mirror.com/Systran/faster-whisper-large-v3)（Hugging Face镜像站）
+4. 对于中文 ASR（额外功能），从 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files)、[Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files) 和 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 下载模型，并将它们放置在 `tools/asr/models` 目录中。
 
+5. 对于英语或日语 ASR（额外功能），从 [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 下载模型，并将其放置在 `tools/asr/models` 目录中。此外，[其他模型](https://huggingface.co/Systran) 可能具有类似效果且占用更少的磁盘空间。
 
 ## 数据集格式
 
@@ -249,44 +235,44 @@ python webui.py
 
 新特性:
 
-  1.支持韩语及粤语
+1. 支持韩语及粤语
 
-  2.更好的文本前端
+2. 更好的文本前端
 
-  3.底模由2k小时扩展至5k小时
+3. 底模由2k小时扩展至5k小时
 
-  4.对低音质参考音频（尤其是来源于网络的高频严重缺失、听着很闷的音频）合成出来音质更好
+4. 对低音质参考音频（尤其是来源于网络的高频严重缺失、听着很闷的音频）合成出来音质更好
 
-  详见[wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+    详见[wiki](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
 
 从v1环境迁移至v2
 
-  1.需要pip安装requirements.txt更新环境
+1. 需要pip安装requirements.txt更新环境
 
-  2.需要克隆github上的最新代码
+2. 需要克隆github上的最新代码
 
-  3.需要从[huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) 下载预训练模型文件放到GPT_SoVITS\pretrained_models\gsv-v2final-pretrained下
+3. 需要从[huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) 下载预训练模型文件放到GPT_SoVITS\pretrained_models\gsv-v2final-pretrained下
 
-  中文额外需要下载[G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（下载G2PW模型,解压并重命名为`G2PWModel`,将其放到`GPT_SoVITS\text`目录下
+    中文额外需要下载[G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（下载G2PW模型,解压并重命名为`G2PWModel`,将其放到`GPT_SoVITS/text`目录下）
 
 ## 待办事项清单
 
-- [ ] **高优先级：**
+- [x] **高优先级：**
 
   - [x] 日语和英语的本地化。
   - [x] 用户指南。
   - [x] 日语和英语数据集微调训练。
 
 - [ ] **功能:**
-  - [ ] 零样本声音转换（5 秒）/ 少样本声音转换（1 分钟）。
-  - [ ] TTS 语速控制。
-  - [ ] 增强的 TTS 情感控制。
+  - [x] 零样本声音转换（5 秒）/ 少样本声音转换（1 分钟）。
+  - [x] TTS 语速控制。
+  - [ ] ~~增强的 TTS 情感控制。~~
   - [ ] 尝试将 SoVITS 令牌输入更改为词汇的概率分布。
-  - [ ] 改进英语和日语文本前端。
+  - [x] 改进英语和日语文本前端。
   - [ ] 开发体积小和更大的 TTS 模型。
   - [x] Colab 脚本。
   - [ ] 扩展训练数据集（从 2k 小时到 10k 小时）。
-  - [ ] 更好的 sovits 基础模型（增强的音频质量）。
+  - [x] 更好的 sovits 基础模型（增强的音频质量）。
   - [ ] 模型混合。
 
 ## （附加）命令行运行方式
@@ -350,6 +336,8 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
 - [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
 - [FunASR](https://github.com/alibaba-damo-academy/FunASR)
 
+感谢 @Naozumi520 提供粤语训练集，并在粤语相关知识方面给予指导。
+
 ## 感谢所有贡献者的努力
 
 <a href="https://github.com/RVC-Boss/GPT-SoVITS/graphs/contributors" target="_blank">
diff --git a/docs/ja/README.md b/docs/ja/README.md
index e3a9f003..35852a94 100644
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@@ -76,12 +76,6 @@ pip install -r requirements.txt
 
 ### 手動インストール
 
-#### 依存関係をインストールします
-
-```bash
-pip install -r requirementx.txt
-```
-
 #### FFmpegをインストールします。
 
 ##### Conda ユーザー
@@ -107,6 +101,12 @@ conda install -c conda-forge 'ffmpeg<7'
 brew install ffmpeg
 ```
 
+#### 依存関係をインストールします
+
+```bash
+pip install -r requirementx.txt
+```
+
 ### Docker の使用
 
 #### docker-compose.yaml の設定
@@ -136,11 +136,15 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
 
 ## 事前訓練済みモデル
 
-[GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) から事前訓練済みモデルをダウンロードし、`GPT_SoVITSpretrained_models` に置きます。
+1. [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) から事前訓練済みモデルをダウンロードし、`GPT_SoVITS/pretrained_models` ディレクトリに配置してください。
 
-中国語 ASR（追加）については、[Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files)、[Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files)、[Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) からモデルをダウンロードし、`tools/asr/models` に置いてください。
+2. [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip) からモデルをダウンロードし、解凍して `G2PWModel` にリネームし、`GPT_SoVITS/text` ディレクトリに配置してください。（中国語TTSのみ）
 
-UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally) の場合は、[UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) からモデルをダウンロードして `tools/uvr5/uvr5_weights` に置きます。
+3. UVR5（ボーカル/伴奏分離 & リバーブ除去の追加機能）の場合は、[UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) からモデルをダウンロードし、`tools/uvr5/uvr5_weights` ディレクトリに配置してください。
+
+4. 中国語ASR（追加機能）の場合は、[Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files)、[Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files)、および [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) からモデルをダウンロードし、`tools/asr/models` ディレクトリに配置してください。
+
+5. 英語または日本語のASR（追加機能）の場合は、[Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) からモデルをダウンロードし、`tools/asr/models` ディレクトリに配置してください。また、[他のモデル](https://huggingface.co/Systran) は、より少ないディスク容量で同様の効果を持つ可能性があります。
 
 ## データセット形式
 
@@ -161,25 +165,106 @@ vocal_path|speaker_name|language|text
 ```
 D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
 ```
+## 微調整と推論
+
+### WebUIを開く
+
+#### 統合パッケージ利用者
+
+`go-webui.bat`をダブルクリックするか、`go-webui.ps`を使用します。
+V1に切り替えたい場合は、`go-webui-v1.bat`をダブルクリックするか、`go-webui-v1.ps`を使用してください。
+
+#### その他
+
+```bash
+python webui.py <言語(オプション)>
+```
+
+V1に切り替えたい場合は
+
+```bash
+python webui.py v1 <言語(オプション)>
+```
+またはWebUIで手動でバージョンを切り替えます。
+
+### 微調整
+
+#### パス自動補完がサポートされました
+
+    1.音声パスを入力する
+
+    2.音声を小さなチャンクに分割する
+
+    3.ノイズ除去（オプション）
+
+    4.ASR
+
+    5.ASR転写を校正する
+
+    6.次のタブに移動し、モデルを微調整する
+
+### 推論WebUIを開く
+
+#### 統合パッケージ利用者
+
+`go-webui-v2.bat`をダブルクリックするか、`go-webui-v2.ps`を使用して、`1-GPT-SoVITS-TTS/1C-inference`で推論webuiを開きます。
+
+#### その他
+
+```bash
+python GPT_SoVITS/inference_webui.py <言語(オプション)>
+```
+または
+
+```bash
+python webui.py
+```
+その後、`1-GPT-SoVITS-TTS/1C-inference`で推論webuiを開きます。
+
+## V2リリースノート
+
+新機能:
+
+1. 韓国語と広東語をサポート
+
+2. 最適化されたテキストフロントエンド
+
+3. 事前学習済みモデルが2千時間から5千時間に拡張
+
+4. 低品質の参照音声に対する合成品質の向上
+
+    [詳細はこちら](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+
+V1環境からV2を使用するには:
+
+1. `pip install -r requirements.txt`を使用していくつかのパッケージを更新
+
+2. 最新のコードをgithubからクローン
+
+3. [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained)からV2の事前学習モデルをダウンロードし、それらを`GPT_SoVITS\pretrained_models\gsv-v2final-pretrained`に配置
+
+    中国語V2追加: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)（G2PWモデルをダウンロードし、解凍して`G2PWModel`にリネームし、`GPT_SoVITS/text`に配置します）
+
+
 
 ## Todo リスト
 
-- [ ] **優先度 高:**
+- [x] **優先度 高:**
 
   - [x] 日本語と英語でのローカライズ。
-  - [ ] ユーザーガイド。
+  - [x] ユーザーガイド。
   - [x] 日本語データセットと英語データセットのファインチューニングトレーニング。
 
 - [ ] **機能:**
-  - [ ] ゼロショット音声変換（5 秒）／数ショット音声変換（1 分）。
-  - [ ] TTS スピーキングスピードコントロール。
-  - [ ] TTS の感情コントロールの強化。
+  - [x] ゼロショット音声変換（5 秒）／数ショット音声変換（1 分）。
+  - [x] TTS スピーキングスピードコントロール。
+  - [ ] ~~TTS の感情コントロールの強化。~~
   - [ ] SoVITS トークン入力を語彙の確率分布に変更する実験。
-  - [ ] 英語と日本語のテキストフロントエンドを改善。
+  - [x] 英語と日本語のテキストフロントエンドを改善。
   - [ ] 小型と大型の TTS モデルを開発する。
   - [x] Colab のスクリプト。
   - [ ] トレーニングデータセットを拡張する（2k→10k）。
-  - [ ] より良い sovits ベースモデル（音質向上）
+  - [x] より良い sovits ベースモデル（音質向上）
   - [ ] モデルミックス
 
 ## (追加の) コマンドラインから実行する方法
@@ -240,6 +325,8 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
 - [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
 - [FunASR](https://github.com/alibaba-damo-academy/FunASR)
 
+@Naozumi520 さん、広東語のトレーニングセットの提供と、広東語に関する知識のご指導をいただき、感謝申し上げます。
+
 ## すべてのコントリビューターに感謝します
 
 <a href="https://github.com/RVC-Boss/GPT-SoVITS/graphs/contributors" target="_blank">
diff --git a/docs/ko/README.md b/docs/ko/README.md
index 4deb2c4e..31783cc3 100644
--- a/docs/ko/README.md
+++ b/docs/ko/README.md
@@ -76,12 +76,6 @@ pip install -r requirements.txt
 
 ### 수동 설치
 
-#### 의존성 설치
-
-```bash
-pip install -r requirements.txt
-```
-
 #### FFmpeg 설치
 
 ##### Conda 사용자
@@ -107,6 +101,12 @@ conda install -c conda-forge 'ffmpeg<7'
 brew install ffmpeg
 ```
 
+#### 의존성 설치
+
+```bash
+pip install -r requirements.txt
+```
+
 ### Docker에서 사용
 
 #### docker-compose.yaml 설정
@@ -137,13 +137,17 @@ docker compose -f "docker-compose.yaml" up -d
 docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
 ```
 
-## 사전 훈련된 모델
+## 사전 학습된 모델
 
-[GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS)에서 사전 훈련된 모델을 다운로드하고 `GPT_SoVITS\pretrained_models`에 넣습니다.
+1. [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 에서 사전 학습된 모델을 다운로드하고, `GPT_SoVITS/pretrained_models` 디렉토리에 배치하세요.
 
-중국어 자동 음성 인식(ASR), 음성 반주 분리 및 음성 제거를 위해 [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files) 및 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files)을 다운로드하고 `tools/asr/models`에 넣습니다.
+2. [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip) 에서 모델을 다운로드하고 압축을 풀어 `G2PWModel`로 이름을 변경한 후, `GPT_SoVITS/text` 디렉토리에 배치하세요. (중국어 TTS 전용)
 
-UVR5(음성/반주 분리 및 잔향 제거)를 위해 [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights)에서 모델을 다운로드하고 `tools/uvr5/uvr5_weights`에 넣습니다.
+3. UVR5 (보컬/반주 분리 & 잔향 제거 추가 기능)의 경우, [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) 에서 모델을 다운로드하고 `tools/uvr5/uvr5_weights` 디렉토리에 배치하세요.
+
+4. 중국어 ASR (추가 기능)의 경우, [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files) 및 [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) 에서 모델을 다운로드하고, `tools/asr/models` 디렉토리에 배치하세요.
+
+5. 영어 또는 일본어 ASR (추가 기능)의 경우, [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) 에서 모델을 다운로드하고, `tools/asr/models` 디렉토리에 배치하세요. 또한, [다른 모델](https://huggingface.co/Systran) 은 더 적은 디스크 용량으로 비슷한 효과를 가질 수 있습니다.
 
 ## 데이터셋 형식
 
@@ -165,25 +169,106 @@ vocal_path|speaker_name|language|text
 D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
 ```
 
+## 미세 조정 및 추론
+
+### WebUI 열기
+
+#### 통합 패키지 사용자
+
+`go-webui.bat`을 더블 클릭하거나 `go-webui.ps`를 사용하십시오.
+V1으로 전환하려면, `go-webui-v1.bat`을 더블 클릭하거나 `go-webui-v1.ps`를 사용하십시오.
+
+#### 기타
+
+```bash
+python webui.py <언어(옵션)>
+```
+
+V1으로 전환하려면,
+
+```bash
+python webui.py v1 <언어(옵션)>
+```
+또는 WebUI에서 수동으로 버전을 전환하십시오.
+
+### 미세 조정
+
+#### 경로 자동 채우기가 지원됩니다
+
+    1. 오디오 경로를 입력하십시오.
+
+    2. 오디오를 작은 청크로 분할하십시오.
+
+    3. 노이즈 제거(옵션)
+
+    4. ASR 수행
+
+    5. ASR 전사를 교정하십시오.
+
+    6. 다음 탭으로 이동하여 모델을 미세 조정하십시오.
+
+### 추론 WebUI 열기
+
+#### 통합 패키지 사용자
+
+`go-webui-v2.bat`을 더블 클릭하거나 `go-webui-v2.ps`를 사용한 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다.
+
+#### 기타
+
+```bash
+python GPT_SoVITS/inference_webui.py <언어(옵션)>
+```
+또는
+
+```bash
+python webui.py
+```
+그런 다음 `1-GPT-SoVITS-TTS/1C-inference`에서 추론 webui를 엽니다.
+
+## V2 릴리스 노트
+
+새로운 기능:
+
+1. 한국어 및 광둥어 지원
+
+2. 최적화된 텍스트 프론트엔드
+
+3. 사전 학습 모델이 2천 시간에서 5천 시간으로 확장
+
+4. 저품질 참조 오디오에 대한 합성 품질 향상
+
+    [자세한 내용](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+
+V1 환경에서 V2를 사용하려면:
+
+1. `pip install -r requirements.txt`를 사용하여 일부 패키지 업데이트
+
+2. github에서 최신 코드를 클론하십시오.
+
+3. [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained)에서 V2 사전 학습 모델을 다운로드하여 `GPT_SoVITS\pretrained_models\gsv-v2final-pretrained`에 넣으십시오.
+
+    중국어 V2 추가: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip) (G2PW 모델을 다운로드하여 압축을 풀고 `G2PWModel`로 이름을 변경한 다음 `GPT_SoVITS/text`에 배치합니다.)
+
+
 ## 할 일 목록
 
-- [ ] **최우선순위:**
+- [x] **최우선순위:**
 
   - [x] 일본어 및 영어 지역화.
-  - [ ] 사용자 가이드.
+  - [x] 사용자 가이드.
   - [x] 일본어 및 영어 데이터셋 미세 조정 훈련.
 
 - [ ] **기능:**
 
-  - [ ] 제로샷 음성 변환 (5초) / 소량의 음성 변환 (1분).
-  - [ ] TTS 속도 제어.
-  - [ ] 향상된 TTS 감정 제어.
+  - [x] 제로샷 음성 변환 (5초) / 소량의 음성 변환 (1분).
+  - [x] TTS 속도 제어.
+  - [ ] ~~향상된 TTS 감정 제어.~~
   - [ ] SoVITS 토큰 입력을 단어 확률 분포로 변경해 보세요.
-  - [ ] 영어 및 일본어 텍스트 프론트 엔드 개선.
+  - [x] 영어 및 일본어 텍스트 프론트 엔드 개선.
   - [ ] 작은 크기와 큰 크기의 TTS 모델 개발.
   - [x] Colab 스크립트.
   - [ ] 훈련 데이터셋 확장 (2k 시간에서 10k 시간).
-  - [ ] 더 나은 sovits 기본 모델 (향상된 오디오 품질).
+  - [x] 더 나은 sovits 기본 모델 (향상된 오디오 품질).
   - [ ] 모델 블렌딩.
 
 ## (추가적인) 명령줄에서 실행하는 방법
@@ -244,6 +329,7 @@ python ./tools/asr/fasterwhisper_asr.py -i <input> -o <output> -l <language> -p
 - [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
 - [FunASR](https://github.com/alibaba-damo-academy/FunASR)
 
+@Naozumi520 님께 감사드립니다. 광둥어 학습 자료를 제공해 주시고, 광둥어 관련 지식을 지도해 주셔서 감사합니다.
 
 ## 모든 기여자들에게 감사드립니다 ;)
 
diff --git a/docs/tr/README.md b/docs/tr/README.md
index 5b9a103f..32210c38 100644
--- a/docs/tr/README.md
+++ b/docs/tr/README.md
@@ -76,12 +76,6 @@ pip install -r requirements.txt
 
 ### El ile Yükleme
 
-#### Bağımlılıkları Yükleme
-
-```bash
-pip install -r requirements.txt
-```
-
 #### FFmpeg'i Yükleme
 
 ##### Conda Kullanıcıları
@@ -107,6 +101,12 @@ conda install -c conda-forge 'ffmpeg<7'
 brew install ffmpeg
 ```
 
+#### Bağımlılıkları Yükleme
+
+```bash
+pip install -r requirements.txt
+```
+
 ### Docker Kullanarak
 
 #### docker-compose.yaml yapılandırması
@@ -136,13 +136,15 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
 
 ## Önceden Eğitilmiş Modeller
 
-Önceden eğitilmiş modelleri [GPT-SoVITS Modelleri](https://huggingface.co/lj1995/GPT-SoVITS) adresinden indirin ve `GPT_SoVITS/pretrained_models` dizinine yerleştirin.
+1. [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) üzerinden önceden eğitilmiş modelleri indirip `GPT_SoVITS/pretrained_models` dizinine yerleştirin.
 
-UVR5 (Vokal/Eşlik Ayırma ve Yankı Giderme, ayrıca) için, modelleri [UVR5 Ağırlıkları](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) adresinden indirin ve `tools/uvr5/uvr5_weights` dizinine yerleştirin.
+2. [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip) üzerinden modeli indirip sıkıştırmayı açın ve `G2PWModel` olarak yeniden adlandırın, ardından `GPT_SoVITS/text` dizinine yerleştirin. (Sadece Çince TTS için)
 
-Çince ASR (ayrıca) için, modelleri [Damo ASR Modeli](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Modeli](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), ve [Damo Punc Modeli](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) adreslerinden indirin ve `tools/asr/models` dizinine yerleştirin.
+3. UVR5 (Vokal/Enstrümantal Ayrımı & Yankı Giderme) için, [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) üzerinden modelleri indirip `tools/uvr5/uvr5_weights` dizinine yerleştirin.
 
-İngilizce veya Japonca ASR (ayrıca) için, modelleri [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) adresinden indirin ve `tools/asr/models` dizinine yerleştirin. Ayrıca, [diğer modeller](https://huggingface.co/Systran) daha küçük disk alanı kaplamasıyla benzer etkiye sahip olabilir.
+4. Çince ASR için, [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files) ve [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) üzerinden modelleri indirip `tools/asr/models` dizinine yerleştirin.
+
+5. İngilizce veya Japonca ASR için, [Faster Whisper Large V3](https://huggingface.co/Systran/faster-whisper-large-v3) üzerinden modeli indirip `tools/asr/models` dizinine yerleştirin. Ayrıca, [diğer modeller](https://huggingface.co/Systran) benzer bir etki yaratabilir ve daha az disk alanı kaplayabilir.
 
 ## Veri Seti Formatı
 
@@ -164,24 +166,104 @@ Dil sözlüğü:
 D:\GPT-SoVITS\xxx/xxx.wav|xxx|en|I like playing Genshin.
 ```
 
+## İnce Ayar ve Çıkarım
+
+### WebUI'yi Açın
+
+#### Entegre Paket Kullanıcıları
+
+`go-webui.bat` dosyasına çift tıklayın veya `go-webui.ps` kullanın.
+V1'e geçmek istiyorsanız, `go-webui-v1.bat` dosyasına çift tıklayın veya `go-webui-v1.ps` kullanın.
+
+#### Diğerleri
+
+```bash
+python webui.py <dil(isteğe bağlı)>
+```
+
+V1'e geçmek istiyorsanız,
+
+```bash
+python webui.py v1 <dil(isteğe bağlı)>
+```
+veya WebUI'de manuel olarak sürüm değiştirin.
+
+### İnce Ayar
+
+#### Yol Otomatik Doldurma artık destekleniyor
+
+    1. Ses yolunu doldurun
+
+    2. Sesi küçük parçalara ayırın
+
+    3. Gürültü azaltma (isteğe bağlı)
+
+    4. ASR
+
+    5. ASR transkripsiyonlarını düzeltin
+
+    6. Bir sonraki sekmeye geçin ve modeli ince ayar yapın
+
+### Çıkarım WebUI'sini Açın
+
+#### Entegre Paket Kullanıcıları
+
+`go-webui-v2.bat` dosyasına çift tıklayın veya `go-webui-v2.ps` kullanın, ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın.
+
+#### Diğerleri
+
+```bash
+python GPT_SoVITS/inference_webui.py <dil(isteğe bağlı)>
+```
+VEYA
+
+```bash
+python webui.py
+```
+ardından çıkarım webui'sini `1-GPT-SoVITS-TTS/1C-inference` adresinde açın.
+
+## V2 Sürüm Notları
+
+Yeni Özellikler:
+
+1. Korece ve Kantonca destekler
+
+2. Optimize edilmiş metin ön yüzü
+
+3. Önceden eğitilmiş model 2k saatten 5k saate kadar genişletildi
+
+4. Düşük kaliteli referans sesler için geliştirilmiş sentez kalitesi
+
+    [detaylar burada](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+
+V1 ortamından V2'yi kullanmak için:
+
+1. `pip install -r requirements.txt` ile bazı paketleri güncelleyin
+
+2. github'dan en son kodları klonlayın.
+
+3. [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) adresinden v2 önceden eğitilmiş modelleri indirin ve bunları `GPT_SoVITS\pretrained_models\gsv-v2final-pretrained` dizinine yerleştirin.
+
+    Ek olarak Çince V2: [G2PWModel_1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip) (G2PW modellerini indirip, zipten çıkarıp, `G2PWModel` olarak yeniden adlandırıp `GPT_SoVITS/text` dizinine yerleştirin.)
+
 ## Yapılacaklar Listesi
 
-- [ ] **Yüksek Öncelikli:**
+- [x] **Yüksek Öncelikli:**
 
   - [x] Japonca ve İngilizceye yerelleştirme.
   - [x] Kullanıcı kılavuzu.
   - [x] Japonca ve İngilizce veri seti ince ayar eğitimi.
 
 - [ ] **Özellikler:**
-  - [ ] Sıfır örnekli ses dönüştürme (5s) / birkaç örnekli ses dönüştürme (1dk).
-  - [ ] Metinden konuşmaya konuşma hızı kontrolü.
-  - [ ] Gelişmiş metinden konuşmaya duygu kontrolü.
+  - [x] Sıfır örnekli ses dönüştürme (5s) / birkaç örnekli ses dönüştürme (1dk).
+  - [x] Metinden konuşmaya konuşma hızı kontrolü.
+  - [ ] ~~Gelişmiş metinden konuşmaya duygu kontrolü.~~
   - [ ] SoVITS token girdilerini kelime dağarcığı olasılık dağılımına değiştirme denemesi.
-  - [ ] İngilizce ve Japonca metin ön ucunu iyileştirme.
+  - [x] İngilizce ve Japonca metin ön ucunu iyileştirme.
   - [ ] Küçük ve büyük boyutlu metinden konuşmaya modelleri geliştirme.
   - [x] Colab betikleri.
   - [ ] Eğitim veri setini genişletmeyi dene (2k saat -> 10k saat).
-  - [ ] daha iyi sovits temel modeli (geliştirilmiş ses kalitesi)
+  - [x] daha iyi sovits temel modeli (geliştirilmiş ses kalitesi)
   - [ ] model karışımı
 
 ## (Ekstra) Komut satırından çalıştırma yöntemi
@@ -241,6 +323,8 @@ python ./tools/asr/fasterwhisper_asr.py -i <girdi> -o <çıktı> -l <dil>
 - [gradio](https://github.com/gradio-app/gradio)
 - [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
 - [FunASR](https://github.com/alibaba-damo-academy/FunASR)
+
+@Naozumi520’ye Kantonca eğitim setini sağladığı ve Kantonca ile ilgili bilgiler konusunda rehberlik ettiği için minnettarım.
   
 ## Tüm katkıda bulunanlara çabaları için teşekkürler