mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-08 07:49:59 +08:00
Merge d5d4906bd3a4703b234fd4e09d76e9dd7922e0d1 into 7a112b804a06a7e6846a8ca3d8a178a493ebdc77
This commit is contained in:
commit
00d7f39eea
5
.gitignore
vendored
5
.gitignore
vendored
@ -10,6 +10,11 @@ reference
|
|||||||
GPT_weights
|
GPT_weights
|
||||||
SoVITS_weights
|
SoVITS_weights
|
||||||
TEMP
|
TEMP
|
||||||
|
PortableGit
|
||||||
|
|
||||||
|
|
||||||
ffmpeg.exe
|
ffmpeg.exe
|
||||||
ffprobe.exe
|
ffprobe.exe
|
||||||
|
*.bat
|
||||||
|
PortableGit/
|
||||||
|
|
||||||
|
@ -1,7 +1,3 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from tools.i18n.i18n import I18nAuto
|
from tools.i18n.i18n import I18nAuto
|
||||||
@ -24,6 +20,8 @@ def register_method(name):
|
|||||||
|
|
||||||
splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", }
|
splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def split_big_text(text, max_len=510):
|
def split_big_text(text, max_len=510):
|
||||||
# 定义全角和半角标点符号
|
# 定义全角和半角标点符号
|
||||||
punctuation = "".join(splits)
|
punctuation = "".join(splits)
|
||||||
@ -49,8 +47,6 @@ def split_big_text(text, max_len=510):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def split(todo_text):
|
def split(todo_text):
|
||||||
todo_text = todo_text.replace("……", "。").replace("——", ",")
|
todo_text = todo_text.replace("……", "。").replace("——", ",")
|
||||||
if todo_text[-1] not in splits:
|
if todo_text[-1] not in splits:
|
||||||
@ -69,6 +65,20 @@ def split(todo_text):
|
|||||||
i_split_head += 1
|
i_split_head += 1
|
||||||
return todo_texts
|
return todo_texts
|
||||||
|
|
||||||
|
# contributed by XTer
|
||||||
|
# 简单的按长度切分,不希望出现超长的句子
|
||||||
|
def split_long_sentence(text, max_length=510):
|
||||||
|
|
||||||
|
opts = []
|
||||||
|
sentences = text.split('\n')
|
||||||
|
for sentence in sentences:
|
||||||
|
while len(sentence) > max_length:
|
||||||
|
part = sentence[:max_length]
|
||||||
|
opts.append(part)
|
||||||
|
sentence = sentence[max_length:]
|
||||||
|
if sentence:
|
||||||
|
opts.append(sentence)
|
||||||
|
return "\n".join(opts)
|
||||||
|
|
||||||
# 不切
|
# 不切
|
||||||
@register_method("cut0")
|
@register_method("cut0")
|
||||||
@ -79,7 +89,7 @@ def cut0(inp):
|
|||||||
# 凑四句一切
|
# 凑四句一切
|
||||||
@register_method("cut1")
|
@register_method("cut1")
|
||||||
def cut1(inp):
|
def cut1(inp):
|
||||||
inp = inp.strip("\n")
|
inp = split_long_sentence(inp).strip("\n")
|
||||||
inps = split(inp)
|
inps = split(inp)
|
||||||
split_idx = list(range(0, len(inps), 4))
|
split_idx = list(range(0, len(inps), 4))
|
||||||
split_idx[-1] = None
|
split_idx[-1] = None
|
||||||
@ -91,10 +101,11 @@ def cut1(inp):
|
|||||||
opts = [inp]
|
opts = [inp]
|
||||||
return "\n".join(opts)
|
return "\n".join(opts)
|
||||||
|
|
||||||
|
|
||||||
# 凑50字一切
|
# 凑50字一切
|
||||||
@register_method("cut2")
|
@register_method("cut2")
|
||||||
def cut2(inp):
|
def cut2(inp, max_length=50):
|
||||||
inp = inp.strip("\n")
|
inp = split_long_sentence(inp).strip("\n")
|
||||||
inps = split(inp)
|
inps = split(inp)
|
||||||
if len(inps) < 2:
|
if len(inps) < 2:
|
||||||
return inp
|
return inp
|
||||||
@ -104,7 +115,7 @@ def cut2(inp):
|
|||||||
for i in range(len(inps)):
|
for i in range(len(inps)):
|
||||||
summ += len(inps[i])
|
summ += len(inps[i])
|
||||||
tmp_str += inps[i]
|
tmp_str += inps[i]
|
||||||
if summ > 50:
|
if summ > max_length:
|
||||||
summ = 0
|
summ = 0
|
||||||
opts.append(tmp_str)
|
opts.append(tmp_str)
|
||||||
tmp_str = ""
|
tmp_str = ""
|
||||||
@ -116,16 +127,18 @@ def cut2(inp):
|
|||||||
opts = opts[:-1]
|
opts = opts[:-1]
|
||||||
return "\n".join(opts)
|
return "\n".join(opts)
|
||||||
|
|
||||||
|
|
||||||
# 按中文句号。切
|
# 按中文句号。切
|
||||||
@register_method("cut3")
|
@register_method("cut3")
|
||||||
def cut3(inp):
|
def cut3(inp):
|
||||||
inp = inp.strip("\n")
|
inp = split_long_sentence(inp).strip("\n")
|
||||||
return "\n".join(["%s" % item for item in inp.strip("。").split("。")])
|
return "\n".join(["%s" % item for item in inp.strip("。").split("。")])
|
||||||
|
|
||||||
#按英文句号.切
|
|
||||||
|
# 按英文句号.切
|
||||||
@register_method("cut4")
|
@register_method("cut4")
|
||||||
def cut4(inp):
|
def cut4(inp):
|
||||||
inp = inp.strip("\n")
|
inp = split_long_sentence(inp).strip("\n")
|
||||||
return "\n".join(["%s" % item for item in inp.strip(".").split(".")])
|
return "\n".join(["%s" % item for item in inp.strip(".").split(".")])
|
||||||
|
|
||||||
# 按标点符号切
|
# 按标点符号切
|
||||||
@ -134,7 +147,7 @@ def cut4(inp):
|
|||||||
def cut5(inp):
|
def cut5(inp):
|
||||||
# if not re.search(r'[^\w\s]', inp[-1]):
|
# if not re.search(r'[^\w\s]', inp[-1]):
|
||||||
# inp += '。'
|
# inp += '。'
|
||||||
inp = inp.strip("\n")
|
inp = split_long_sentence(inp).strip("\n")
|
||||||
punds = r'[,.;?!、,。?!;:…]'
|
punds = r'[,.;?!、,。?!;:…]'
|
||||||
items = re.split(f'({punds})', inp)
|
items = re.split(f'({punds})', inp)
|
||||||
mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])]
|
mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])]
|
||||||
@ -144,9 +157,67 @@ def cut5(inp):
|
|||||||
opt = "\n".join(mergeitems)
|
opt = "\n".join(mergeitems)
|
||||||
return opt
|
return opt
|
||||||
|
|
||||||
|
# contributed by https://github.com/X-T-E-R/GPT-SoVITS-Inference/blob/main/GPT_SoVITS/TTS_infer_pack/text_segmentation_method.py
|
||||||
|
@register_method("auto_cut")
|
||||||
|
def auto_cut(inp, max_length=60):
|
||||||
|
# if not re.search(r'[^\w\s]', inp[-1]):
|
||||||
|
# inp += '。'
|
||||||
|
inp = inp.strip("\n")
|
||||||
|
erase_punds = r'[“”"‘’\'()()【】[\]{}<>《》〈〉〔〕〖〗〘〙〚〛〛〞〟]'
|
||||||
|
inp = re.sub(erase_punds, '', inp)
|
||||||
|
split_punds = r'[?!。?!~:]'
|
||||||
|
if inp[-1] not in split_punds:
|
||||||
|
inp+="。"
|
||||||
|
items = re.split(f'({split_punds})', inp)
|
||||||
|
items = ["".join(group) for group in zip(items[::2], items[1::2])]
|
||||||
|
|
||||||
|
def process_commas(text, max_length):
|
||||||
|
|
||||||
|
# Define separators and the regular expression for splitting
|
||||||
|
separators = [',', ',', '、', '——', '…']
|
||||||
|
# 使用正则表达式的捕获组来保留分隔符,分隔符两边的括号就是所谓的捕获组
|
||||||
|
regex_pattern = '(' + '|'.join(map(re.escape, separators)) + ')'
|
||||||
|
# 使用re.split函数分割文本,由于使用了捕获组,分隔符也会作为分割结果的一部分返回
|
||||||
|
sentences = re.split(regex_pattern, text)
|
||||||
|
|
||||||
|
processed_text = ""
|
||||||
|
current_line = ""
|
||||||
|
|
||||||
|
final_sentences = []
|
||||||
|
|
||||||
|
for sentence in sentences:
|
||||||
|
if len(sentence)>max_length:
|
||||||
|
|
||||||
|
final_sentences+=split_long_sentence(sentence,max_length=max_length).split("\n")
|
||||||
|
else:
|
||||||
|
final_sentences.append(sentence)
|
||||||
|
|
||||||
|
for sentence in final_sentences:
|
||||||
|
# Add the length of the sentence plus one for the space or newline that will follow
|
||||||
|
if len(current_line) + len(sentence) <= max_length:
|
||||||
|
# If adding the next sentence does not exceed max length, add it to the current line
|
||||||
|
current_line += sentence
|
||||||
|
else:
|
||||||
|
# If the current line is too long, start a new line
|
||||||
|
processed_text += current_line.strip() + '\n'
|
||||||
|
current_line = sentence + " " # Start the new line with the current sentence
|
||||||
|
|
||||||
|
# Add any remaining text in current_line to processed_text
|
||||||
|
processed_text += current_line.strip()
|
||||||
|
|
||||||
|
return processed_text
|
||||||
|
|
||||||
|
final_items = []
|
||||||
|
for item in items:
|
||||||
|
final_items+=process_commas(item,max_length=max_length).split("\n")
|
||||||
|
|
||||||
|
final_items = [item for item in final_items if item.strip() and not (len(item.strip()) == 1 and item.strip() in "?!,,。?!~:")]
|
||||||
|
|
||||||
|
return "\n".join(final_items)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
method = get_method("cut5")
|
method = get_method("cut0")
|
||||||
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。"))
|
str1="""一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十
|
||||||
|
"""
|
||||||
|
print("|\n|".join(method(str1).split("\n")))
|
||||||
|
@ -66,6 +66,7 @@ cut_method = {
|
|||||||
i18n("按中文句号。切"): "cut3",
|
i18n("按中文句号。切"): "cut3",
|
||||||
i18n("按英文句号.切"): "cut4",
|
i18n("按英文句号.切"): "cut4",
|
||||||
i18n("按标点符号切"): "cut5",
|
i18n("按标点符号切"): "cut5",
|
||||||
|
i18n("基于标点凑字切"): "auto_cut",
|
||||||
}
|
}
|
||||||
|
|
||||||
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
|
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
|
||||||
|
@ -23,5 +23,5 @@ PyYAML
|
|||||||
psutil
|
psutil
|
||||||
jieba_fast
|
jieba_fast
|
||||||
jieba
|
jieba
|
||||||
LangSegment>=0.2.0
|
LangSegment>=0.2.5
|
||||||
Faster_Whisper
|
Faster_Whisper
|
Loading…
x
Reference in New Issue
Block a user