from __future__ import annotations import subprocess import threading import wave from io import BytesIO import numpy as np import soundfile as sf import torch def set_scheduler_seed(seed: int): if seed in ["", None]: return seed = int(seed) if seed < 0: return np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) def pack_ogg(io_buffer: BytesIO, data: np.ndarray, rate: int): def handle_pack_ogg(): with sf.SoundFile(io_buffer, mode="w", samplerate=rate, channels=1, format="ogg") as audio_file: audio_file.write(data) stack_size = 4096 * 4096 try: threading.stack_size(stack_size) pack_ogg_thread = threading.Thread(target=handle_pack_ogg) pack_ogg_thread.start() pack_ogg_thread.join() except (RuntimeError, ValueError): handle_pack_ogg() return io_buffer def pack_raw(io_buffer: BytesIO, data: np.ndarray, rate: int): io_buffer.write(data.tobytes()) return io_buffer def pack_wav(io_buffer: BytesIO, data: np.ndarray, rate: int): io_buffer = BytesIO() sf.write(io_buffer, data, rate, format="wav") return io_buffer def pack_aac(io_buffer: BytesIO, data: np.ndarray, rate: int): process = subprocess.Popen( [ "ffmpeg", "-f", "s16le", "-ar", str(rate), "-ac", "1", "-i", "pipe:0", "-c:a", "aac", "-b:a", "192k", "-vn", "-f", "adts", "pipe:1", ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) out, _ = process.communicate(input=data.tobytes()) io_buffer.write(out) return io_buffer def pack_audio(io_buffer: BytesIO, data: np.ndarray, rate: int, media_type: str): if media_type == "ogg": io_buffer = pack_ogg(io_buffer, data, rate) elif media_type == "aac": io_buffer = pack_aac(io_buffer, data, rate) elif media_type == "wav": io_buffer = pack_wav(io_buffer, data, rate) else: io_buffer = pack_raw(io_buffer, data, rate) io_buffer.seek(0) return io_buffer def wave_header_chunk(frame_input=b"", channels=1, sample_width=2, sample_rate=32000): wav_buf = BytesIO() with wave.open(wav_buf, "wb") as vfout: vfout.setnchannels(channels) vfout.setsampwidth(sample_width) vfout.setframerate(sample_rate) vfout.writeframes(frame_input) wav_buf.seek(0) return wav_buf.read()