Update text_segmentation_method.py

修改按英文句号.切分方式
This commit is contained in:
Cosmo Clara 2024-10-18 01:03:28 +08:00 committed by GitHub
parent eee607b71d
commit 395f97a176
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -135,7 +135,7 @@ def cut3(inp):
@register_method("cut4")
def cut4(inp):
inp = inp.strip("\n")
opts = ["%s" % item for item in inp.strip(".").split(".")]
opts = re.split(r'(?<!\d)\.(?!\d)', inp.strip("."))
opts = [item for item in opts if not set(item).issubset(punctuation)]
return "\n".join(opts)