mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Update text_segmentation_method.py
第85行,修改cut1函数,如果文本的句数为11句,则原函数会把句子切分为 4/7而不是4/4/3。模型处理长句子容易出现漏字现象。 第137行函数cut5中的punds的取值做出修改:删除重复的";",新增":;"
This commit is contained in:
parent
37a895a67d
commit
160dfbdd2c
@ -82,7 +82,8 @@ def cut1(inp):
|
||||
inp = inp.strip("\n")
|
||||
inps = split(inp)
|
||||
split_idx = list(range(0, len(inps), 4))
|
||||
split_idx[-1] = None
|
||||
# split_idx[-1] = None
|
||||
split_idx.append(None)
|
||||
if len(split_idx) > 1:
|
||||
opts = []
|
||||
for idx in range(len(split_idx) - 1):
|
||||
@ -135,7 +136,8 @@ def cut5(inp):
|
||||
# if not re.search(r'[^\w\s]', inp[-1]):
|
||||
# inp += '。'
|
||||
inp = inp.strip("\n")
|
||||
punds = r'[,.;?!、,。?!;:…]'
|
||||
# punds = r'[,.;?!、,。?!;:…]'
|
||||
punds = r'[,.;?!、,。?!;::…]'
|
||||
items = re.split(f'({punds})', inp)
|
||||
mergeitems = ["".join(group) for group in zip(items[::2], items[1::2])]
|
||||
# 在句子不存在符号或句尾无符号的时候保证文本完整
|
||||
@ -149,4 +151,4 @@ def cut5(inp):
|
||||
if __name__ == '__main__':
|
||||
method = get_method("cut5")
|
||||
print(method("你好,我是小明。你好,我是小红。你好,我是小刚。你好,我是小张。"))
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user