diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 4dc1040..44c6d0e 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -65,7 +65,7 @@ from text import cleaned_text_to_sequence from text.cleaner import clean_text from time import time as ttime from module.mel_processing import spectrogram_torch -from my_utils import load_audio +from tools.my_utils import load_audio from tools.i18n.i18n import I18nAuto i18n = I18nAuto() diff --git a/GPT_SoVITS/module/data_utils.py b/GPT_SoVITS/module/data_utils.py index ff4c4f4..72c8055 100644 --- a/GPT_SoVITS/module/data_utils.py +++ b/GPT_SoVITS/module/data_utils.py @@ -17,7 +17,7 @@ from functools import lru_cache import requests from scipy.io import wavfile from io import BytesIO -from my_utils import load_audio +from tools.my_utils import load_audio # ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79) class TextAudioSpeakerLoader(torch.utils.data.Dataset): diff --git a/GPT_SoVITS/my_utils.py b/GPT_SoVITS/my_utils.py deleted file mode 100644 index 776939d..0000000 --- a/GPT_SoVITS/my_utils.py +++ /dev/null @@ -1,21 +0,0 @@ -import ffmpeg -import numpy as np - - -def load_audio(file, sr): - try: - # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26 - # This launches a subprocess to decode audio while down-mixing and resampling as necessary. - # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. - file = ( - file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - ) # 防止小白拷路径头尾带了空格和"和回车 - out, _ = ( - ffmpeg.input(file, threads=0) - .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) - .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) - ) - except Exception as e: - raise RuntimeError(f"Failed to load audio: {e}") - - return np.frombuffer(out, np.float32).flatten() diff --git a/GPT_SoVITS/onnx_export.py b/GPT_SoVITS/onnx_export.py index b82e987..ab457d7 100644 --- a/GPT_SoVITS/onnx_export.py +++ b/GPT_SoVITS/onnx_export.py @@ -9,7 +9,7 @@ cnhubert.cnhubert_base_path=cnhubert_base_path ssl_model = cnhubert.get_model() from text import cleaned_text_to_sequence import soundfile -from my_utils import load_audio +from tools.my_utils import load_audio import os import json diff --git a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py index 61c933a..17394ee 100644 --- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py +++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py @@ -17,7 +17,7 @@ from scipy.io import wavfile import librosa,torch now_dir = os.getcwd() sys.path.append(now_dir) -from my_utils import load_audio +from tools.my_utils import load_audio # from config import cnhubert_base_path # cnhubert.cnhubert_base_path=cnhubert_base_path diff --git a/api.py b/api.py index b534071..aa822ca 100644 --- a/api.py +++ b/api.py @@ -143,7 +143,7 @@ from AR.models.t2s_lightning_module import Text2SemanticLightningModule from text import cleaned_text_to_sequence from text.cleaner import clean_text from module.mel_processing import spectrogram_torch -from my_utils import load_audio +from tools.my_utils import load_audio import config as global_config import logging import subprocess diff --git a/tools/slice_audio.py b/tools/slice_audio.py index 46ee408..8a06292 100644 --- a/tools/slice_audio.py +++ b/tools/slice_audio.py @@ -3,7 +3,7 @@ import traceback from scipy.io import wavfile # parent_directory = os.path.dirname(os.path.abspath(__file__)) # sys.path.append(parent_directory) -from my_utils import load_audio +from tools.my_utils import load_audio from slicer2 import Slicer def slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,i_part,all_part):