mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-08-15 05:21:57 +08:00
remove duplicate spaces
This commit is contained in:
parent
ed96ffd752
commit
05bcf9c859
@ -121,70 +121,70 @@ class TextPreprocessor:
|
|||||||
|
|
||||||
def get_phones_and_bert(self, text: str, language: str, version: str, final: bool = False):
|
def get_phones_and_bert(self, text: str, language: str, version: str, final: bool = False):
|
||||||
with self.bert_lock:
|
with self.bert_lock:
|
||||||
if language in {"all_zh", "all_yue", "all_ja", "all_ko", "zh", "ja", "ko", "yue", "en", "auto", "auto_yue"}:
|
text = re.sub(r' {2,}', ' ', text)
|
||||||
textlist = []
|
textlist = []
|
||||||
langlist = []
|
langlist = []
|
||||||
if language == "all_zh":
|
if language == "all_zh":
|
||||||
for tmp in LangSegmenter.getTexts(text,"zh"):
|
for tmp in LangSegmenter.getTexts(text,"zh"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_yue":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"zh"):
|
||||||
|
if tmp["lang"] == "zh":
|
||||||
|
tmp["lang"] = "yue"
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_ja":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"ja"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_ko":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"ko"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "en":
|
||||||
|
formattext = text
|
||||||
|
while " " in formattext:
|
||||||
|
formattext = formattext.replace(" ", " ")
|
||||||
|
langlist.append("en")
|
||||||
|
textlist.append(formattext)
|
||||||
|
elif language == "auto":
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "auto_yue":
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
if tmp["lang"] == "zh":
|
||||||
|
tmp["lang"] = "yue"
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
else:
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
if langlist:
|
||||||
|
if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
|
||||||
|
textlist[-1] += tmp["text"]
|
||||||
|
continue
|
||||||
|
if tmp["lang"] == "en":
|
||||||
langlist.append(tmp["lang"])
|
langlist.append(tmp["lang"])
|
||||||
textlist.append(tmp["text"])
|
else:
|
||||||
elif language == "all_yue":
|
# 因无法区别中日韩文汉字,以用户输入为准
|
||||||
for tmp in LangSegmenter.getTexts(text,"zh"):
|
langlist.append(language)
|
||||||
if tmp["lang"] == "zh":
|
textlist.append(tmp["text"])
|
||||||
tmp["lang"] = "yue"
|
# print(textlist)
|
||||||
langlist.append(tmp["lang"])
|
# print(langlist)
|
||||||
textlist.append(tmp["text"])
|
phones_list = []
|
||||||
elif language == "all_ja":
|
bert_list = []
|
||||||
for tmp in LangSegmenter.getTexts(text,"ja"):
|
norm_text_list = []
|
||||||
langlist.append(tmp["lang"])
|
for i in range(len(textlist)):
|
||||||
textlist.append(tmp["text"])
|
lang = langlist[i]
|
||||||
elif language == "all_ko":
|
phones, word2ph, norm_text = self.clean_text_inf(textlist[i], lang, version)
|
||||||
for tmp in LangSegmenter.getTexts(text,"ko"):
|
bert = self.get_bert_inf(phones, word2ph, norm_text, lang)
|
||||||
langlist.append(tmp["lang"])
|
phones_list.append(phones)
|
||||||
textlist.append(tmp["text"])
|
norm_text_list.append(norm_text)
|
||||||
elif language == "en":
|
bert_list.append(bert)
|
||||||
formattext = text
|
bert = torch.cat(bert_list, dim=1)
|
||||||
while " " in formattext:
|
phones = sum(phones_list, [])
|
||||||
formattext = formattext.replace(" ", " ")
|
norm_text = "".join(norm_text_list)
|
||||||
langlist.append("en")
|
|
||||||
textlist.append(formattext)
|
|
||||||
elif language == "auto":
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
elif language == "auto_yue":
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
if tmp["lang"] == "zh":
|
|
||||||
tmp["lang"] = "yue"
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
else:
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
if langlist:
|
|
||||||
if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
|
|
||||||
textlist[-1] += tmp["text"]
|
|
||||||
continue
|
|
||||||
if tmp["lang"] == "en":
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
else:
|
|
||||||
# 因无法区别中日韩文汉字,以用户输入为准
|
|
||||||
langlist.append(language)
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
# print(textlist)
|
|
||||||
# print(langlist)
|
|
||||||
phones_list = []
|
|
||||||
bert_list = []
|
|
||||||
norm_text_list = []
|
|
||||||
for i in range(len(textlist)):
|
|
||||||
lang = langlist[i]
|
|
||||||
phones, word2ph, norm_text = self.clean_text_inf(textlist[i], lang, version)
|
|
||||||
bert = self.get_bert_inf(phones, word2ph, norm_text, lang)
|
|
||||||
phones_list.append(phones)
|
|
||||||
norm_text_list.append(norm_text)
|
|
||||||
bert_list.append(bert)
|
|
||||||
bert = torch.cat(bert_list, dim=1)
|
|
||||||
phones = sum(phones_list, [])
|
|
||||||
norm_text = "".join(norm_text_list)
|
|
||||||
|
|
||||||
if not final and len(phones) < 6:
|
if not final and len(phones) < 6:
|
||||||
return self.get_phones_and_bert("." + text, language, version, final=True)
|
return self.get_phones_and_bert("." + text, language, version, final=True)
|
||||||
|
@ -586,70 +586,70 @@ from text import chinese
|
|||||||
|
|
||||||
|
|
||||||
def get_phones_and_bert(text, language, version, final=False):
|
def get_phones_and_bert(text, language, version, final=False):
|
||||||
if language in {"all_zh", "all_yue", "all_ja", "all_ko", "zh", "ja", "ko", "yue", "en", "auto", "auto_yue"}:
|
text = re.sub(r' {2,}', ' ', text)
|
||||||
textlist = []
|
textlist = []
|
||||||
langlist = []
|
langlist = []
|
||||||
if language == "all_zh":
|
if language == "all_zh":
|
||||||
for tmp in LangSegmenter.getTexts(text,"zh"):
|
for tmp in LangSegmenter.getTexts(text,"zh"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_yue":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"zh"):
|
||||||
|
if tmp["lang"] == "zh":
|
||||||
|
tmp["lang"] = "yue"
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_ja":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"ja"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_ko":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"ko"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "en":
|
||||||
|
formattext = text
|
||||||
|
while " " in formattext:
|
||||||
|
formattext = formattext.replace(" ", " ")
|
||||||
|
langlist.append("en")
|
||||||
|
textlist.append(formattext)
|
||||||
|
elif language == "auto":
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "auto_yue":
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
if tmp["lang"] == "zh":
|
||||||
|
tmp["lang"] = "yue"
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
else:
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
if langlist:
|
||||||
|
if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
|
||||||
|
textlist[-1] += tmp["text"]
|
||||||
|
continue
|
||||||
|
if tmp["lang"] == "en":
|
||||||
langlist.append(tmp["lang"])
|
langlist.append(tmp["lang"])
|
||||||
textlist.append(tmp["text"])
|
else:
|
||||||
elif language == "all_yue":
|
# 因无法区别中日韩文汉字,以用户输入为准
|
||||||
for tmp in LangSegmenter.getTexts(text,"zh"):
|
langlist.append(language)
|
||||||
if tmp["lang"] == "zh":
|
textlist.append(tmp["text"])
|
||||||
tmp["lang"] = "yue"
|
print(textlist)
|
||||||
langlist.append(tmp["lang"])
|
print(langlist)
|
||||||
textlist.append(tmp["text"])
|
phones_list = []
|
||||||
elif language == "all_ja":
|
bert_list = []
|
||||||
for tmp in LangSegmenter.getTexts(text,"ja"):
|
norm_text_list = []
|
||||||
langlist.append(tmp["lang"])
|
for i in range(len(textlist)):
|
||||||
textlist.append(tmp["text"])
|
lang = langlist[i]
|
||||||
elif language == "all_ko":
|
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
|
||||||
for tmp in LangSegmenter.getTexts(text,"ko"):
|
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
||||||
langlist.append(tmp["lang"])
|
phones_list.append(phones)
|
||||||
textlist.append(tmp["text"])
|
norm_text_list.append(norm_text)
|
||||||
elif language == "en":
|
bert_list.append(bert)
|
||||||
formattext = text
|
bert = torch.cat(bert_list, dim=1)
|
||||||
while " " in formattext:
|
phones = sum(phones_list, [])
|
||||||
formattext = formattext.replace(" ", " ")
|
norm_text = "".join(norm_text_list)
|
||||||
langlist.append("en")
|
|
||||||
textlist.append(formattext)
|
|
||||||
elif language == "auto":
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
elif language == "auto_yue":
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
if tmp["lang"] == "zh":
|
|
||||||
tmp["lang"] = "yue"
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
else:
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
if langlist:
|
|
||||||
if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
|
|
||||||
textlist[-1] += tmp["text"]
|
|
||||||
continue
|
|
||||||
if tmp["lang"] == "en":
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
else:
|
|
||||||
# 因无法区别中日韩文汉字,以用户输入为准
|
|
||||||
langlist.append(language)
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
print(textlist)
|
|
||||||
print(langlist)
|
|
||||||
phones_list = []
|
|
||||||
bert_list = []
|
|
||||||
norm_text_list = []
|
|
||||||
for i in range(len(textlist)):
|
|
||||||
lang = langlist[i]
|
|
||||||
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
|
|
||||||
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
|
||||||
phones_list.append(phones)
|
|
||||||
norm_text_list.append(norm_text)
|
|
||||||
bert_list.append(bert)
|
|
||||||
bert = torch.cat(bert_list, dim=1)
|
|
||||||
phones = sum(phones_list, [])
|
|
||||||
norm_text = "".join(norm_text_list)
|
|
||||||
|
|
||||||
if not final and len(phones) < 6:
|
if not final and len(phones) < 6:
|
||||||
return get_phones_and_bert("." + text, language, version, final=True)
|
return get_phones_and_bert("." + text, language, version, final=True)
|
||||||
|
122
api.py
122
api.py
@ -532,68 +532,68 @@ from text import chinese
|
|||||||
|
|
||||||
|
|
||||||
def get_phones_and_bert(text, language, version, final=False):
|
def get_phones_and_bert(text, language, version, final=False):
|
||||||
if language in {"all_zh", "all_yue", "all_ja", "all_ko", "zh", "ja", "ko", "yue", "en", "auto", "auto_yue"}:
|
text = re.sub(r' {2,}', ' ', text)
|
||||||
textlist = []
|
textlist = []
|
||||||
langlist = []
|
langlist = []
|
||||||
if language == "all_zh":
|
if language == "all_zh":
|
||||||
for tmp in LangSegmenter.getTexts(text,"zh"):
|
for tmp in LangSegmenter.getTexts(text,"zh"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_yue":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"zh"):
|
||||||
|
if tmp["lang"] == "zh":
|
||||||
|
tmp["lang"] = "yue"
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_ja":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"ja"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "all_ko":
|
||||||
|
for tmp in LangSegmenter.getTexts(text,"ko"):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "en":
|
||||||
|
formattext = text
|
||||||
|
while " " in formattext:
|
||||||
|
formattext = formattext.replace(" ", " ")
|
||||||
|
langlist.append("en")
|
||||||
|
textlist.append(formattext)
|
||||||
|
elif language == "auto":
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
elif language == "auto_yue":
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
if tmp["lang"] == "zh":
|
||||||
|
tmp["lang"] = "yue"
|
||||||
|
langlist.append(tmp["lang"])
|
||||||
|
textlist.append(tmp["text"])
|
||||||
|
else:
|
||||||
|
for tmp in LangSegmenter.getTexts(text):
|
||||||
|
if langlist:
|
||||||
|
if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
|
||||||
|
textlist[-1] += tmp["text"]
|
||||||
|
continue
|
||||||
|
if tmp["lang"] == "en":
|
||||||
langlist.append(tmp["lang"])
|
langlist.append(tmp["lang"])
|
||||||
textlist.append(tmp["text"])
|
else:
|
||||||
elif language == "all_yue":
|
# 因无法区别中日韩文汉字,以用户输入为准
|
||||||
for tmp in LangSegmenter.getTexts(text,"zh"):
|
langlist.append(language)
|
||||||
if tmp["lang"] == "zh":
|
textlist.append(tmp["text"])
|
||||||
tmp["lang"] = "yue"
|
phones_list = []
|
||||||
langlist.append(tmp["lang"])
|
bert_list = []
|
||||||
textlist.append(tmp["text"])
|
norm_text_list = []
|
||||||
elif language == "all_ja":
|
for i in range(len(textlist)):
|
||||||
for tmp in LangSegmenter.getTexts(text,"ja"):
|
lang = langlist[i]
|
||||||
langlist.append(tmp["lang"])
|
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
|
||||||
textlist.append(tmp["text"])
|
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
||||||
elif language == "all_ko":
|
phones_list.append(phones)
|
||||||
for tmp in LangSegmenter.getTexts(text,"ko"):
|
norm_text_list.append(norm_text)
|
||||||
langlist.append(tmp["lang"])
|
bert_list.append(bert)
|
||||||
textlist.append(tmp["text"])
|
bert = torch.cat(bert_list, dim=1)
|
||||||
elif language == "en":
|
phones = sum(phones_list, [])
|
||||||
formattext = text
|
norm_text = "".join(norm_text_list)
|
||||||
while " " in formattext:
|
|
||||||
formattext = formattext.replace(" ", " ")
|
|
||||||
langlist.append("en")
|
|
||||||
textlist.append(formattext)
|
|
||||||
elif language == "auto":
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
elif language == "auto_yue":
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
if tmp["lang"] == "zh":
|
|
||||||
tmp["lang"] = "yue"
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
else:
|
|
||||||
for tmp in LangSegmenter.getTexts(text):
|
|
||||||
if langlist:
|
|
||||||
if (tmp["lang"] == "en" and langlist[-1] == "en") or (tmp["lang"] != "en" and langlist[-1] != "en"):
|
|
||||||
textlist[-1] += tmp["text"]
|
|
||||||
continue
|
|
||||||
if tmp["lang"] == "en":
|
|
||||||
langlist.append(tmp["lang"])
|
|
||||||
else:
|
|
||||||
# 因无法区别中日韩文汉字,以用户输入为准
|
|
||||||
langlist.append(language)
|
|
||||||
textlist.append(tmp["text"])
|
|
||||||
phones_list = []
|
|
||||||
bert_list = []
|
|
||||||
norm_text_list = []
|
|
||||||
for i in range(len(textlist)):
|
|
||||||
lang = langlist[i]
|
|
||||||
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
|
|
||||||
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
|
||||||
phones_list.append(phones)
|
|
||||||
norm_text_list.append(norm_text)
|
|
||||||
bert_list.append(bert)
|
|
||||||
bert = torch.cat(bert_list, dim=1)
|
|
||||||
phones = sum(phones_list, [])
|
|
||||||
norm_text = "".join(norm_text_list)
|
|
||||||
|
|
||||||
if not final and len(phones) < 6:
|
if not final and len(phones) < 6:
|
||||||
return get_phones_and_bert("." + text, language, version, final=True)
|
return get_phones_and_bert("." + text, language, version, final=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user