added new fixed code
This commit is contained in:
@@ -4,29 +4,21 @@ import librosa
|
||||
from torch.utils.data import Dataset
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
|
||||
HOP = 512
|
||||
N_FFT = 1024
|
||||
DURATION = 2.0
|
||||
SR = 44100
|
||||
|
||||
|
||||
def audio_to_logmag(audio):
|
||||
# STFT
|
||||
stft = librosa.stft(audio, n_fft=N_FFT, hop_length=HOP)
|
||||
mag = np.abs(stft)
|
||||
logmag = np.log1p(mag) # log(1 + x) for stability
|
||||
return logmag # shape: (1, freq_bins, time_frames) = (1, 513, T)
|
||||
from settings import SR, N_FFT
|
||||
from misc import audio_to_logmag
|
||||
|
||||
|
||||
class WaveformDataset(Dataset):
|
||||
def __init__(self, lossy_dir, clean_dir, sr=SR, segment_sec=4):
|
||||
self.cache = dict()
|
||||
mean = np.zeros([N_FFT // 2 + 1])
|
||||
std = np.ones([N_FFT // 2 + 1])
|
||||
|
||||
# Duration is a very very important parameter, read the cavemanml.py to see how and why adjust it!!!
|
||||
# For the purposes of this file, it's the length of the audio clip being selected from the dataset.
|
||||
def __init__(self, lossy_dir, clean_dir, segment_duration, sr=SR):
|
||||
self.segment_duration = segment_duration
|
||||
self.sr = sr
|
||||
self.lossy_dir = lossy_dir
|
||||
self.clean_dir = clean_dir
|
||||
self.segment_len = int(segment_sec * sr)
|
||||
self.lossy_files = sorted(os.listdir(lossy_dir))
|
||||
self.clean_files = sorted(os.listdir(clean_dir))
|
||||
self.file_pairs = [
|
||||
@@ -51,9 +43,9 @@ class WaveformDataset(Dataset):
|
||||
min_len = min(len(lossy), len(clean))
|
||||
lossy, clean = lossy[:min_len], clean[:min_len]
|
||||
|
||||
# Random 2-second clip
|
||||
# Random clip
|
||||
|
||||
clip_len = int(DURATION * SR)
|
||||
clip_len = int(self.segment_duration * SR)
|
||||
if min_len < clip_len:
|
||||
# pad if too short
|
||||
lossy = np.pad(lossy, (0, clip_len - min_len))
|
||||
@@ -61,14 +53,21 @@ class WaveformDataset(Dataset):
|
||||
start = 0
|
||||
else:
|
||||
start = random.randint(0, min_len - clip_len)
|
||||
# start = 0
|
||||
lossy = lossy[start : start + clip_len]
|
||||
clean = clean[start : start + clip_len]
|
||||
|
||||
logmag_x = audio_to_logmag(lossy)
|
||||
logmag_y = audio_to_logmag(clean)
|
||||
|
||||
logmag_x_norm = (logmag_x - self.mean[:, None]) / (self.std[:, None] + 1e-8)
|
||||
logmag_y_norm = (logmag_y - self.mean[:, None]) / (self.std[:, None] + 1e-8)
|
||||
|
||||
ans = (
|
||||
torch.from_numpy(audio_to_logmag(lossy)).unsqueeze(0),
|
||||
torch.from_numpy(audio_to_logmag(clean)).unsqueeze(0),
|
||||
torch.from_numpy(logmag_x_norm).float().unsqueeze(0),
|
||||
torch.from_numpy(logmag_y_norm).float().unsqueeze(0),
|
||||
)
|
||||
|
||||
self.cache[idx] = ans
|
||||
# self.cache[idx] = ans
|
||||
|
||||
return ans
|
||||
|
||||
Reference in New Issue
Block a user