mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
parent
a3da8e87b5
commit
277b258360
@ -8,6 +8,7 @@ sys.path.append(now_dir)
|
||||
import re
|
||||
import torch
|
||||
import LangSegment
|
||||
|
||||
from typing import Dict, List, Tuple
|
||||
from text.cleaner import clean_text
|
||||
from text import cleaned_text_to_sequence
|
||||
@ -17,6 +18,7 @@ from TTS_infer_pack.text_segmentation_method import split_big_text, splits, get_
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
|
||||
i18n = I18nAuto()
|
||||
punctuation = set(['!', '?', '…', ',', '.', '-'," "])
|
||||
|
||||
def get_first(text:str) -> str:
|
||||
pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]"
|
||||
@ -54,6 +56,7 @@ class TextPreprocessor:
|
||||
|
||||
def preprocess(self, text:str, lang:str, text_split_method:str)->List[Dict]:
|
||||
print(i18n("############ 切分文本 ############"))
|
||||
texts = self.replace_consecutive_punctuation(texts)
|
||||
texts = self.pre_seg_text(text, lang, text_split_method)
|
||||
result = []
|
||||
print(i18n("############ 提取文本Bert特征 ############"))
|
||||
@ -83,6 +86,7 @@ class TextPreprocessor:
|
||||
text = text.replace("\n\n", "\n")
|
||||
|
||||
_texts = text.split("\n")
|
||||
_texts = self.process_text(_texts)
|
||||
_texts = merge_short_text_in_array(_texts, 5)
|
||||
texts = []
|
||||
|
||||
@ -205,6 +209,23 @@ class TextPreprocessor:
|
||||
|
||||
return feature
|
||||
|
||||
def process_text(self,texts):
|
||||
_text=[]
|
||||
if all(text in [None, " ", "\n",""] for text in texts):
|
||||
raise ValueError(i18n("请输入有效文本"))
|
||||
for text in texts:
|
||||
if text in [None, " ", ""]:
|
||||
pass
|
||||
else:
|
||||
_text.append(text)
|
||||
return _text
|
||||
|
||||
|
||||
def replace_consecutive_punctuation(self,text):
|
||||
punctuations = ''.join(re.escape(p) for p in punctuation)
|
||||
pattern = f'([{punctuations}])([{punctuations}])+'
|
||||
result = re.sub(pattern, r'\1', text)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@ from typing import Callable
|
||||
from tools.i18n.i18n import I18nAuto
|
||||
|
||||
i18n = I18nAuto()
|
||||
|
||||
punctuation = set(['!', '?', '…', ',', '.', '-'," "])
|
||||
METHODS = dict()
|
||||
|
||||
def get_method(name:str)->Callable:
|
||||
@ -76,7 +76,10 @@ def split(todo_text):
|
||||
# 不切
|
||||
@register_method("cut0")
|
||||
def cut0(inp):
|
||||
return inp
|
||||
if not set(inp).issubset(punctuation):
|
||||
return inp
|
||||
else:
|
||||
return "/n"
|
||||
|
||||
|
||||
# 凑四句一切
|
||||
@ -93,6 +96,7 @@ def cut1(inp):
|
||||
opts.append("".join(inps[split_idx[idx]: split_idx[idx + 1]]))
|
||||
else:
|
||||
opts = [inp]
|
||||
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
||||
return "\n".join(opts)
|
||||
|
||||
# 凑50字一切
|
||||
@ -118,19 +122,24 @@ def cut2(inp):
|
||||
if len(opts) > 1 and len(opts[-1]) < 50: ##如果最后一个太短了,和前一个合一起
|
||||
opts[-2] = opts[-2] + opts[-1]
|
||||
opts = opts[:-1]
|
||||
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
||||
return "\n".join(opts)
|
||||
|
||||
# 按中文句号。切
|
||||
@register_method("cut3")
|
||||
def cut3(inp):
|
||||
inp = inp.strip("\n")
|
||||
return "\n".join(["%s" % item for item in inp.strip("。").split("。")])
|
||||
opts = ["%s" % item for item in inp.strip("。").split("。")]
|
||||
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
||||
return "\n".join(opts)
|
||||
|
||||
#按英文句号.切
|
||||
@register_method("cut4")
|
||||
def cut4(inp):
|
||||
inp = inp.strip("\n")
|
||||
return "\n".join(["%s" % item for item in inp.strip(".").split(".")])
|
||||
opts = ["%s" % item for item in inp.strip(".").split(".")]
|
||||
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
||||
return "\n".join(opts)
|
||||
|
||||
# 按标点符号切
|
||||
# contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py
|
||||
@ -146,8 +155,9 @@ def cut5(inp):
|
||||
# 在句子不存在符号或句尾无符号的时候保证文本完整
|
||||
if len(items)%2 == 1:
|
||||
mergeitems.append(items[-1])
|
||||
opt = "\n".join(mergeitems)
|
||||
return opt
|
||||
opts = [item for item in mergeitems if not set(item).issubset(punctuation)]
|
||||
opts = "\n".join(opts)
|
||||
return opts
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user