Upload files to "/"

2026-01-07 12:19:00 +01:00
parent 3b9adb1ee8
commit 478e8e2971
4 changed files with 411 additions and 0 deletions
--- a/caveman_wavedataset.py
+++ b/caveman_wavedataset.py
@@ -0,0 +1,74 @@
+import os
+import torch
+import librosa
+from torch.utils.data import Dataset
+import numpy as np
+import random
+
+
+HOP = 512
+N_FFT = 1024
+DURATION = 2.0
+SR = 44100
+
+
+def audio_to_logmag(audio):
+    # STFT
+    stft = librosa.stft(audio, n_fft=N_FFT, hop_length=HOP)
+    mag = np.abs(stft)
+    logmag = np.log1p(mag)  # log(1 + x) for stability
+    return logmag  # shape: (1, freq_bins, time_frames) = (1, 513, T)
+
+
+class WaveformDataset(Dataset):
+    def __init__(self, lossy_dir, clean_dir, sr=SR, segment_sec=4):
+        self.cache = dict()
+        self.sr = sr
+        self.lossy_dir = lossy_dir
+        self.clean_dir = clean_dir
+        self.segment_len = int(segment_sec * sr)
+        self.lossy_files = sorted(os.listdir(lossy_dir))
+        self.clean_files = sorted(os.listdir(clean_dir))
+        self.file_pairs = [
+            (f, f) for f in self.lossy_files if f in set(self.clean_files)
+        ]
+
+    def __len__(self):
+        return len(self.file_pairs)
+
+    def __getitem__(self, idx):
+        if idx in self.cache:
+            return self.cache[idx]
+
+        lossy_path = os.path.join(self.lossy_dir, self.lossy_files[idx])
+        clean_path = os.path.join(self.clean_dir, self.clean_files[idx])
+
+        # Load
+        lossy, _ = librosa.load(lossy_path, sr=self.sr, mono=True)
+        clean, _ = librosa.load(clean_path, sr=self.sr, mono=True)
+
+        # Match length
+        min_len = min(len(lossy), len(clean))
+        lossy, clean = lossy[:min_len], clean[:min_len]
+
+        # Random 2-second clip
+
+        clip_len = int(DURATION * SR)
+        if min_len < clip_len:
+            # pad if too short
+            lossy = np.pad(lossy, (0, clip_len - min_len))
+            clean = np.pad(clean, (0, clip_len - min_len))
+            start = 0
+        else:
+            start = random.randint(0, min_len - clip_len)
+            lossy = lossy[start : start + clip_len]
+            clean = clean[start : start + clip_len]
+
+        ans = (
+            torch.from_numpy(audio_to_logmag(lossy)).unsqueeze(0),
+            torch.from_numpy(audio_to_logmag(clean)).unsqueeze(0),
+        )
+
+        self.cache[idx] = ans
+
+        return ans