import numpy as np import librosa from settings import N_FFT, HOP def audio_to_logmag(audio): # STFT stft = librosa.stft(audio, n_fft=N_FFT, hop_length=HOP) mag = np.abs(stft) logmag = np.log1p(mag) # log(1 + x) for stability return logmag # shape: (freq_bins, time_frames) = (513, T)