diff --git a/GPT_SoVITS/text/LangSegmenter/langsegmenter.py b/GPT_SoVITS/text/LangSegmenter/langsegmenter.py index cca5bf2..c558348 100644 --- a/GPT_SoVITS/text/LangSegmenter/langsegmenter.py +++ b/GPT_SoVITS/text/LangSegmenter/langsegmenter.py @@ -8,66 +8,7 @@ jieba.setLogLevel(logging.CRITICAL) # 更改fast_langdetect大模型位置 from pathlib import Path import fast_langdetect -fast_langdetect.ft_detect.infer.CACHE_DIRECTORY = Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect" - -# 防止win下无法读取模型 -import os -from typing import Optional -def load_fasttext_model( - model_path: Path, - download_url: Optional[str] = None, - proxy: Optional[str] = None, -): - """ - Load a FastText model, downloading it if necessary. - :param model_path: Path to the FastText model file - :param download_url: URL to download the model from - :param proxy: Proxy URL for downloading the model - :return: FastText model - :raises DetectError: If model loading fails - """ - if all([ - fast_langdetect.ft_detect.infer.VERIFY_FASTTEXT_LARGE_MODEL, - model_path.exists(), - model_path.name == fast_langdetect.ft_detect.infer.FASTTEXT_LARGE_MODEL_NAME, - ]): - if not fast_langdetect.ft_detect.infer.verify_md5(model_path, fast_langdetect.ft_detect.infer.VERIFY_FASTTEXT_LARGE_MODEL): - fast_langdetect.ft_detect.infer.logger.warning( - f"fast-langdetect: MD5 hash verification failed for {model_path}, " - f"please check the integrity of the downloaded file from {fast_langdetect.ft_detect.infer.FASTTEXT_LARGE_MODEL_URL}. " - "\n This may seriously reduce the prediction accuracy. " - "If you want to ignore this, please set `fast_langdetect.ft_detect.infer.VERIFY_FASTTEXT_LARGE_MODEL = None` " - ) - if not model_path.exists(): - if download_url: - fast_langdetect.ft_detect.infer.download_model(download_url, model_path, proxy) - if not model_path.exists(): - raise fast_langdetect.ft_detect.infer.DetectError(f"FastText model file not found at {model_path}") - - try: - # Load FastText model - if (re.match(r'^[A-Za-z0-9_/\\:.]*$', str(model_path))): - model = fast_langdetect.ft_detect.infer.fasttext.load_model(str(model_path)) - else: - python_dir = os.getcwd() - if (str(model_path)[:len(python_dir)].upper() == python_dir.upper()): - model = fast_langdetect.ft_detect.infer.fasttext.load_model(os.path.relpath(model_path, python_dir)) - else: - import tempfile - import shutil - with tempfile.NamedTemporaryFile(delete=False) as tmpfile: - shutil.copyfile(model_path, tmpfile.name) - - model = fast_langdetect.ft_detect.infer.fasttext.load_model(tmpfile.name) - os.unlink(tmpfile.name) - return model - - except Exception as e: - fast_langdetect.ft_detect.infer.logger.warning(f"fast-langdetect:Failed to load FastText model from {model_path}: {e}") - raise fast_langdetect.ft_detect.infer.DetectError(f"Failed to load FastText model: {e}") - -if os.name == 'nt': - fast_langdetect.ft_detect.infer.load_fasttext_model = load_fasttext_model +fast_langdetect.infer._default_detector = fast_langdetect.infer.LangDetector(fast_langdetect.infer.LangDetectConfig(cache_dir=Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect")) from split_lang import LangSplitter diff --git a/GPT_SoVITS/text/japanese.py b/GPT_SoVITS/text/japanese.py index d815ef4..e023ce7 100644 --- a/GPT_SoVITS/text/japanese.py +++ b/GPT_SoVITS/text/japanese.py @@ -10,7 +10,7 @@ try: if os.name == 'nt': python_dir = os.getcwd() OPEN_JTALK_DICT_DIR = pyopenjtalk.OPEN_JTALK_DICT_DIR.decode("utf-8") - if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', OPEN_JTALK_DICT_DIR)): + if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', OPEN_JTALK_DICT_DIR)): if (OPEN_JTALK_DICT_DIR[:len(python_dir)].upper() == python_dir.upper()): OPEN_JTALK_DICT_DIR = os.path.join(os.path.relpath(OPEN_JTALK_DICT_DIR,python_dir)) else: @@ -25,7 +25,7 @@ try: OPEN_JTALK_DICT_DIR = os.path.join("TEMP", "ja", "open_jtalk_dic") pyopenjtalk.OPEN_JTALK_DICT_DIR = OPEN_JTALK_DICT_DIR.encode("utf-8") - if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', current_file_path)): + if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', current_file_path)): if (current_file_path[:len(python_dir)].upper() == python_dir.upper()): current_file_path = os.path.join(os.path.relpath(current_file_path,python_dir)) else: diff --git a/GPT_SoVITS/text/korean.py b/GPT_SoVITS/text/korean.py index 79d89af..daae41f 100644 --- a/GPT_SoVITS/text/korean.py +++ b/GPT_SoVITS/text/korean.py @@ -19,13 +19,13 @@ if os.name == 'nt': print(f'you have to install eunjeon. install it...') else: installpath = spam_spec.submodule_search_locations[0] - if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', installpath)): + if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)): import sys from eunjeon import Mecab as _Mecab class Mecab(_Mecab): def get_dicpath(installpath): - if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', installpath)): + if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)): import shutil python_dir = os.getcwd() if (installpath[:len(python_dir)].upper() == python_dir.upper()): diff --git a/requirements.txt b/requirements.txt index 144c729..0c0a9f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,7 +25,7 @@ psutil jieba_fast jieba split-lang -fast_langdetect +fast_langdetect>=0.3.0 Faster_Whisper wordsegment rotary_embedding_torch