fix: 移除多余 my_utils.py (#1189) (#1251)

* fix: 移除多余 my_utils.py (#1189)

* feat: update my_utils
This commit is contained in:
aoguai 2024-06-29 22:57:01 +08:00 committed by GitHub
parent a208698e77
commit 582ba7d519
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 6 additions and 27 deletions

View File

@ -65,7 +65,7 @@ from text import cleaned_text_to_sequence
from text.cleaner import clean_text
from time import time as ttime
from module.mel_processing import spectrogram_torch
from my_utils import load_audio
from tools.my_utils import load_audio
from tools.i18n.i18n import I18nAuto
i18n = I18nAuto()

View File

@ -17,7 +17,7 @@ from functools import lru_cache
import requests
from scipy.io import wavfile
from io import BytesIO
from my_utils import load_audio
from tools.my_utils import load_audio
# ZeroDivisionError fixed by Tybost (https://github.com/RVC-Boss/GPT-SoVITS/issues/79)
class TextAudioSpeakerLoader(torch.utils.data.Dataset):

View File

@ -1,21 +0,0 @@
import ffmpeg
import numpy as np
def load_audio(file, sr):
try:
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
file = (
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
) # 防止小白拷路径头尾带了空格和"和回车
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
except Exception as e:
raise RuntimeError(f"Failed to load audio: {e}")
return np.frombuffer(out, np.float32).flatten()

View File

@ -9,7 +9,7 @@ cnhubert.cnhubert_base_path=cnhubert_base_path
ssl_model = cnhubert.get_model()
from text import cleaned_text_to_sequence
import soundfile
from my_utils import load_audio
from tools.my_utils import load_audio
import os
import json

View File

@ -17,7 +17,7 @@ from scipy.io import wavfile
import librosa,torch
now_dir = os.getcwd()
sys.path.append(now_dir)
from my_utils import load_audio
from tools.my_utils import load_audio
# from config import cnhubert_base_path
# cnhubert.cnhubert_base_path=cnhubert_base_path

2
api.py
View File

@ -143,7 +143,7 @@ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
from text import cleaned_text_to_sequence
from text.cleaner import clean_text
from module.mel_processing import spectrogram_torch
from my_utils import load_audio
from tools.my_utils import load_audio
import config as global_config
import logging
import subprocess

View File

@ -3,7 +3,7 @@ import traceback
from scipy.io import wavfile
# parent_directory = os.path.dirname(os.path.abspath(__file__))
# sys.path.append(parent_directory)
from my_utils import load_audio
from tools.my_utils import load_audio
from slicer2 import Slicer
def slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,i_part,all_part):