davior · davior · Nov 29, 2025 · Nov 29, 2025 · chatgpt-codex-connector · Nov 29, 2025
diff --git a/README.md b/README.md
@@ -1,6 +1,29 @@
 # V2kAudioProcessor
 Process audio to find voices in it. Used to process V2k recordings and find the audio within it.
 
+## Batch denoising CLI
+You can quickly run the spectral denoising pipeline over one or more audio files (or folders full of files) without writing code:
+
+```bash
+python -m audio.pipeline path/to/input.wav another_dir/ \
+  --output-dir denoised_outputs \
+  --preset aggressive \
+  --multiband-gate --wiener
+```
+
+- Accepts individual files and directories (searched recursively with `--glob`, default `*.wav`).
+- Preserves relative paths inside the output directory and appends a `_denoised_<preset>.wav` suffix.
+- Supports the same optional stages as the library API: multi-band gating, Wiener filtering, and spectral subtraction.
+
+The underlying functions are available from Python if you want to script your own evaluation against a set of test clips:
+
+```python
+from pathlib import Path
+from audio import process_batch
+
+outputs = process_batch([Path("tests/inputs")], Path("tests/outputs"), preset="conservative")
+for out in outputs:
+    print(out)
 ## Overview
 
 The processing pipeline loads an input file, optionally applies a manual gain

diff --git a/src/audio/__init__.py b/src/audio/__init__.py
@@ -0,0 +1,13 @@
+"""Audio processing utilities and batch helpers."""
+
+from .denoise import DenoisePreset, PRESETS, SpectralGateConfig, denoise
+from .pipeline import process_audio_file, process_batch
+
+__all__ = [
+    "SpectralGateConfig",
+    "DenoisePreset",
+    "PRESETS",
+    "denoise",
+    "process_audio_file",
+    "process_batch",
+]
diff --git a/src/audio/denoise.py b/src/audio/denoise.py
@@ -0,0 +1,209 @@
+from dataclasses import dataclass
+from typing import Dict, Optional, Tuple
+
+import librosa
+import numpy as np
+from scipy import signal
+
+
+@dataclass
+class SpectralGateConfig:
+    threshold_db: float = -30.0
+    reduction_db: float = -20.0
+    min_energy_percentile: float = 0.2
+    attack: float = 0.01  # seconds
+    release: float = 0.08  # seconds
+    n_fft: int = 1024
+    hop_length: int = 256
+
+
+@dataclass
+class DenoisePreset:
+    gain_db: float
+    bandpass: Tuple[float, float]
+    primary_gate: SpectralGateConfig
+    secondary_gate: SpectralGateConfig
+    limiter_ceiling: float
+    target_lufs: float
+
+
+PRESETS: Dict[str, DenoisePreset] = {
+    "conservative": DenoisePreset(
+        gain_db=0.0,
+        bandpass=(60.0, 18000.0),
+        primary_gate=SpectralGateConfig(
+            threshold_db=-28.0, reduction_db=-18.0, min_energy_percentile=0.25, attack=0.015, release=0.1
+        ),
+        secondary_gate=SpectralGateConfig(
+            threshold_db=-32.0, reduction_db=-22.0, min_energy_percentile=0.2, attack=0.01, release=0.08
+        ),
+        limiter_ceiling=0.98,
+        target_lufs=-16.0,
+    ),
+    "aggressive": DenoisePreset(
+        gain_db=3.0,
+        bandpass=(80.0, 16000.0),
+        primary_gate=SpectralGateConfig(
+            threshold_db=-26.0, reduction_db=-14.0, min_energy_percentile=0.35, attack=0.02, release=0.12
+        ),
+        secondary_gate=SpectralGateConfig(
+            threshold_db=-30.0, reduction_db=-26.0, min_energy_percentile=0.3, attack=0.015, release=0.1
+        ),
+        limiter_ceiling=0.96,
+        target_lufs=-15.0,
+    ),
+}
+
+
+def _db_to_amplitude(db_value: float) -> float:
+    return 10 ** (db_value / 20.0)
+
+
+def _apply_gain(audio: np.ndarray, gain_db: float) -> np.ndarray:
+    if gain_db == 0:
+        return audio
+    return audio * _db_to_amplitude(gain_db)
+
+
+def _bandpass_filter(audio: np.ndarray, sr: int, low: float, high: float, order: int = 4) -> np.ndarray:
+    nyquist = sr / 2.0
+    low = max(1.0, low)
+    high = min(high, nyquist - 1.0)
+    sos = signal.butter(order, [low / nyquist, high / nyquist], btype="bandpass", output="sos")
+    return signal.sosfiltfilt(sos, audio)
+
+
+def _estimate_noise_profile(
+    y: np.ndarray, config: SpectralGateConfig, energy_percentile: Optional[float] = None
+) -> np.ndarray:
+    percentile = energy_percentile if energy_percentile is not None else config.min_energy_percentile
+    S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length)
+    magnitude = np.abs(S)
+    frame_energy = magnitude.mean(axis=0)
+    cutoff = np.quantile(frame_energy, percentile)
+    noise_frames = magnitude[:, frame_energy <= cutoff]
+    if noise_frames.size == 0:
+        noise_frames = magnitude
+    noise_profile = np.mean(noise_frames, axis=1)
+    return noise_profile
+
+
+def _smooth_mask(mask: np.ndarray, attack_frames: int, release_frames: int) -> np.ndarray:
+    smoothed = np.zeros_like(mask, dtype=float)
+    for freq in range(mask.shape[0]):
+        state = 0.0
+        for t in range(mask.shape[1]):
+            target = mask[freq, t]
+            coeff = 1.0 / attack_frames if target > state else 1.0 / release_frames
+            state += coeff * (target - state)
+            smoothed[freq, t] = state
+    return smoothed
+
+
+def _apply_spectral_gate(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray:
+    S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length)
+    magnitude = np.abs(S)
+    phase = np.angle(S)
+
+    noise_profile = _estimate_noise_profile(y, config)
+    threshold = noise_profile[:, None] * _db_to_amplitude(config.threshold_db)
+
+    mask = magnitude > threshold
+    reduction = _db_to_amplitude(config.reduction_db)
+
+    hop_duration = config.hop_length / float(sr)
+    attack_frames = max(1, int(config.attack / hop_duration))
+    release_frames = max(1, int(config.release / hop_duration))
+    smoothed_mask = _smooth_mask(mask.astype(float), attack_frames, release_frames)
+
+    gated_magnitude = magnitude * (smoothed_mask + (1 - smoothed_mask) * reduction)
+    Y = gated_magnitude * np.exp(1j * phase)
+    return librosa.istft(Y, hop_length=config.hop_length, length=len(y))
+
+
+def _spectral_subtraction(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray:
+    S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length)
+    magnitude = np.abs(S)
+    phase = np.angle(S)
+    noise_profile = _estimate_noise_profile(y, config, energy_percentile=0.4)
+    adjusted = np.maximum(magnitude - noise_profile[:, None], 0.0)
+    return librosa.istft(adjusted * np.exp(1j * phase), hop_length=config.hop_length, length=len(y))
+
+
+def _wiener_filter(y: np.ndarray, size: int = 11) -> np.ndarray:
+    return signal.wiener(y, mysize=size)
+
+
+def _multi_band_gate(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray:
+    nyquist = sr / 2.0
+    bands = [
+        (30.0, min(200.0, nyquist - 1.0)),
+        (200.0, min(2000.0, nyquist - 1.0)),
+        (2000.0, min(8000.0, nyquist - 1.0)),
+    ]
+    band_signals = []
+    for low, high in bands:
+        filtered = _bandpass_filter(y, sr, low, high)
+        gated = _apply_spectral_gate(filtered, sr, config)
+        band_signals.append(gated)
+    combined = np.sum(band_signals, axis=0) / max(len(band_signals), 1)
+    peak = np.max(np.abs(combined)) + 1e-9
+    if peak > 1.0:
+        combined /= peak
+    return combined
+
+
+def _limiter(y: np.ndarray, ceiling: float) -> np.ndarray:
+    ceiling = max(0.0, min(1.0, ceiling))
+    return np.clip(y, -ceiling, ceiling)
+
+
+def _loudness_normalize(y: np.ndarray, target_lufs: float) -> np.ndarray:
+    rms = np.sqrt(np.mean(np.square(y))) + 1e-9
+    current_lufs = 20 * np.log10(rms)
+    gain_db = target_lufs - current_lufs
+    normalized = _apply_gain(y, gain_db)
+    peak = np.max(np.abs(normalized)) + 1e-9
+    if peak > 1.0:
+        normalized /= peak
+    return normalized
+
+
+def denoise(
+    audio: np.ndarray,
+    sr: int,
+    preset: str = "conservative",
+    *,
+    use_wiener: bool = False,
+    use_spectral_subtraction: bool = False,
+    use_multiband_gate: bool = False,
+) -> np.ndarray:
+    if preset not in PRESETS:
+        raise ValueError(f"Unknown preset '{preset}'. Available: {list(PRESETS.keys())}")
+    settings = PRESETS[preset]
+
+    processed = _apply_gain(audio, settings.gain_db)
+    processed = _bandpass_filter(processed, sr, *settings.bandpass)
+
+    if use_multiband_gate:
+        processed = _multi_band_gate(processed, sr, settings.primary_gate)
+    else:
+        processed = _apply_spectral_gate(processed, sr, settings.primary_gate)
+
+    if use_wiener:
+        processed = _wiener_filter(processed)
+    if use_spectral_subtraction:
+        processed = _spectral_subtraction(processed, sr, settings.primary_gate)
+
+    processed = _apply_spectral_gate(processed, sr, settings.secondary_gate)
+    processed = _limiter(processed, settings.limiter_ceiling)
+    processed = _loudness_normalize(processed, settings.target_lufs)
+    return processed
+
+
+__all__ = [
+    "SpectralGateConfig",
+    "DenoisePreset",
+    "PRESETS",
+    "denoise",
+]