fix: prevent concurrent access to BERT model with thread lock

Added thread lock to protect get_phones_and_bert method from potential race conditions during concurrent access. This addresses issue #1844 where multiple threads accessing the BERT model simultaneously could cause data inconsistency or crashes.
This commit is contained in:
lishq 2025-03-07 16:28:21 +08:00
parent 165882d64f
commit 853ff8a089

View File

@ -1,5 +1,6 @@
import os, sys
import threading
from tqdm import tqdm
now_dir = os.getcwd()
@ -54,6 +55,7 @@ class TextPreprocessor:
self.bert_model = bert_model
self.tokenizer = tokenizer
self.device = device
self.bert_lock = threading.RLock()
def preprocess(self, text:str, lang:str, text_split_method:str, version:str="v2")->List[Dict]:
print(f'############ {i18n("切分文本")} ############')
@ -117,6 +119,7 @@ class TextPreprocessor:
return self.get_phones_and_bert(text, language, version)
def get_phones_and_bert(self, text:str, language:str, version:str, final:bool=False):
with self.bert_lock:
if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
language = language.replace("all_","")
formattext = text