import soundfile as sf import numpy as np def load_audio_mono_16k(path: str): wav, sr = sf.read(path, dtype="float32", always_2d=False) if wav.ndim == 2: wav = wav.mean(axis=1) if sr != 16000: # lightweight resample import numpy as np import math ratio = 16000 / sr n = int(math.floor(len(wav) * ratio)) x_old = np.linspace(0, 1, len(wav), endpoint=False) x_new = np.linspace(0, 1, n, endpoint=False) wav = np.interp(x_new, x_old, wav).astype("float32") sr = 16000 return wav, sr