From 0447fd7d40219077a50583464a443b1bcf6a4341 Mon Sep 17 00:00:00 2001
From: "gecko.pico" <davior@users.noreply.github.com>
Date: Sun, 30 Nov 2025 10:54:11 +1100
Subject: [PATCH] Add batch denoising CLI and helpers

---
 README.md             |  25 +++++
 src/audio/__init__.py |  13 +++
 src/audio/denoise.py  | 209 ++++++++++++++++++++++++++++++++++++++++++
 src/audio/pipeline.py | 168 +++++++++++++++++++++++++++++++++
 4 files changed, 415 insertions(+)
 create mode 100644 src/audio/__init__.py
 create mode 100644 src/audio/denoise.py
 create mode 100644 src/audio/pipeline.py
diff --git a/README.md b/README.md
index bebcb32..45eb3f9 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,27 @@
 # V2kAudioProcessor
 Process audio to find voices in it. Used to process V2k recordings and find the audio within it.
+
+## Batch denoising CLI
+You can quickly run the spectral denoising pipeline over one or more audio files (or folders full of files) without writing code:
+
+```bash
+python -m audio.pipeline path/to/input.wav another_dir/ \
+  --output-dir denoised_outputs \
+  --preset aggressive \
+  --multiband-gate --wiener
+```
+
+- Accepts individual files and directories (searched recursively with `--glob`, default `*.wav`).
+- Preserves relative paths inside the output directory and appends a `_denoised_<preset>.wav` suffix.
+- Supports the same optional stages as the library API: multi-band gating, Wiener filtering, and spectral subtraction.
+
+The underlying functions are available from Python if you want to script your own evaluation against a set of test clips:
+
+```python
+from pathlib import Path
+from audio import process_batch
+
+outputs = process_batch([Path("tests/inputs")], Path("tests/outputs"), preset="conservative")
+for out in outputs:
+    print(out)
+```
diff --git a/src/audio/__init__.py b/src/audio/__init__.py
new file mode 100644
index 0000000..49a10f2
--- /dev/null
+++ b/src/audio/__init__.py
@@ -0,0 +1,13 @@
+"""Audio processing utilities and batch helpers."""
+
+from .denoise import DenoisePreset, PRESETS, SpectralGateConfig, denoise
+from .pipeline import process_audio_file, process_batch
+
+__all__ = [
+    "SpectralGateConfig",
+    "DenoisePreset",
+    "PRESETS",
+    "denoise",
+    "process_audio_file",
+    "process_batch",
+]
diff --git a/src/audio/denoise.py b/src/audio/denoise.py
new file mode 100644
index 0000000..2ff9756
--- /dev/null
+++ b/src/audio/denoise.py
@@ -0,0 +1,209 @@
+from dataclasses import dataclass
+from typing import Dict, Optional, Tuple
+
+import librosa
+import numpy as np
+from scipy import signal
+
+
+@dataclass
+class SpectralGateConfig:
+    threshold_db: float = -30.0
+    reduction_db: float = -20.0
+    min_energy_percentile: float = 0.2
+    attack: float = 0.01  # seconds
+    release: float = 0.08  # seconds
+    n_fft: int = 1024
+    hop_length: int = 256
+
+
+@dataclass
+class DenoisePreset:
+    gain_db: float
+    bandpass: Tuple[float, float]
+    primary_gate: SpectralGateConfig
+    secondary_gate: SpectralGateConfig
+    limiter_ceiling: float
+    target_lufs: float
+
+
+PRESETS: Dict[str, DenoisePreset] = {
+    "conservative": DenoisePreset(
+        gain_db=0.0,
+        bandpass=(60.0, 18000.0),
+        primary_gate=SpectralGateConfig(
+            threshold_db=-28.0, reduction_db=-18.0, min_energy_percentile=0.25, attack=0.015, release=0.1
+        ),
+        secondary_gate=SpectralGateConfig(
+            threshold_db=-32.0, reduction_db=-22.0, min_energy_percentile=0.2, attack=0.01, release=0.08
+        ),
+        limiter_ceiling=0.98,
+        target_lufs=-16.0,
+    ),
+    "aggressive": DenoisePreset(
+        gain_db=3.0,
+        bandpass=(80.0, 16000.0),
+        primary_gate=SpectralGateConfig(
+            threshold_db=-26.0, reduction_db=-14.0, min_energy_percentile=0.35, attack=0.02, release=0.12
+        ),
+        secondary_gate=SpectralGateConfig(
+            threshold_db=-30.0, reduction_db=-26.0, min_energy_percentile=0.3, attack=0.015, release=0.1
+        ),
+        limiter_ceiling=0.96,
+        target_lufs=-15.0,
+    ),
+}
+
+
+def _db_to_amplitude(db_value: float) -> float:
+    return 10 ** (db_value / 20.0)
+
+
+def _apply_gain(audio: np.ndarray, gain_db: float) -> np.ndarray:
+    if gain_db == 0:
+        return audio
+    return audio * _db_to_amplitude(gain_db)
+
+
+def _bandpass_filter(audio: np.ndarray, sr: int, low: float, high: float, order: int = 4) -> np.ndarray:
+    nyquist = sr / 2.0
+    low = max(1.0, low)
+    high = min(high, nyquist - 1.0)
+    sos = signal.butter(order, [low / nyquist, high / nyquist], btype="bandpass", output="sos")
+    return signal.sosfiltfilt(sos, audio)
+
+
+def _estimate_noise_profile(
+    y: np.ndarray, config: SpectralGateConfig, energy_percentile: Optional[float] = None
+) -> np.ndarray:
+    percentile = energy_percentile if energy_percentile is not None else config.min_energy_percentile
+    S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length)
+    magnitude = np.abs(S)
+    frame_energy = magnitude.mean(axis=0)
+    cutoff = np.quantile(frame_energy, percentile)
+    noise_frames = magnitude[:, frame_energy <= cutoff]
+    if noise_frames.size == 0:
+        noise_frames = magnitude
+    noise_profile = np.mean(noise_frames, axis=1)
+    return noise_profile
+
+
+def _smooth_mask(mask: np.ndarray, attack_frames: int, release_frames: int) -> np.ndarray:
+    smoothed = np.zeros_like(mask, dtype=float)
+    for freq in range(mask.shape[0]):
+        state = 0.0
+        for t in range(mask.shape[1]):
+            target = mask[freq, t]
+            coeff = 1.0 / attack_frames if target > state else 1.0 / release_frames
+            state += coeff * (target - state)
+            smoothed[freq, t] = state
+    return smoothed
+
+
+def _apply_spectral_gate(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray:
+    S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length)
+    magnitude = np.abs(S)
+    phase = np.angle(S)
+
+    noise_profile = _estimate_noise_profile(y, config)
+    threshold = noise_profile[:, None] * _db_to_amplitude(config.threshold_db)
+
+    mask = magnitude > threshold
+    reduction = _db_to_amplitude(config.reduction_db)
+
+    hop_duration = config.hop_length / float(sr)
+    attack_frames = max(1, int(config.attack / hop_duration))
+    release_frames = max(1, int(config.release / hop_duration))
+    smoothed_mask = _smooth_mask(mask.astype(float), attack_frames, release_frames)
+
+    gated_magnitude = magnitude * (smoothed_mask + (1 - smoothed_mask) * reduction)
+    Y = gated_magnitude * np.exp(1j * phase)
+    return librosa.istft(Y, hop_length=config.hop_length, length=len(y))
+
+
+def _spectral_subtraction(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray:
+    S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length)
+    magnitude = np.abs(S)
+    phase = np.angle(S)
+    noise_profile = _estimate_noise_profile(y, config, energy_percentile=0.4)
+    adjusted = np.maximum(magnitude - noise_profile[:, None], 0.0)
+    return librosa.istft(adjusted * np.exp(1j * phase), hop_length=config.hop_length, length=len(y))
+
+
+def _wiener_filter(y: np.ndarray, size: int = 11) -> np.ndarray:
+    return signal.wiener(y, mysize=size)
+
+
+def _multi_band_gate(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray:
+    nyquist = sr / 2.0
+    bands = [
+        (30.0, min(200.0, nyquist - 1.0)),
+        (200.0, min(2000.0, nyquist - 1.0)),
+        (2000.0, min(8000.0, nyquist - 1.0)),
+    ]
+    band_signals = []
+    for low, high in bands:
+        filtered = _bandpass_filter(y, sr, low, high)
+        gated = _apply_spectral_gate(filtered, sr, config)
+        band_signals.append(gated)
+    combined = np.sum(band_signals, axis=0) / max(len(band_signals), 1)
+    peak = np.max(np.abs(combined)) + 1e-9
+    if peak > 1.0:
+        combined /= peak
+    return combined
+
+
+def _limiter(y: np.ndarray, ceiling: float) -> np.ndarray:
+    ceiling = max(0.0, min(1.0, ceiling))
+    return np.clip(y, -ceiling, ceiling)
+
+
+def _loudness_normalize(y: np.ndarray, target_lufs: float) -> np.ndarray:
+    rms = np.sqrt(np.mean(np.square(y))) + 1e-9
+    current_lufs = 20 * np.log10(rms)
+    gain_db = target_lufs - current_lufs
+    normalized = _apply_gain(y, gain_db)
+    peak = np.max(np.abs(normalized)) + 1e-9
+    if peak > 1.0:
+        normalized /= peak
+    return normalized
+
+
+def denoise(
+    audio: np.ndarray,
+    sr: int,
+    preset: str = "conservative",
+    *,
+    use_wiener: bool = False,
+    use_spectral_subtraction: bool = False,
+    use_multiband_gate: bool = False,
+) -> np.ndarray:
+    if preset not in PRESETS:
+        raise ValueError(f"Unknown preset '{preset}'. Available: {list(PRESETS.keys())}")
+    settings = PRESETS[preset]
+
+    processed = _apply_gain(audio, settings.gain_db)
+    processed = _bandpass_filter(processed, sr, *settings.bandpass)
+
+    if use_multiband_gate:
+        processed = _multi_band_gate(processed, sr, settings.primary_gate)
+    else:
+        processed = _apply_spectral_gate(processed, sr, settings.primary_gate)
+
+    if use_wiener:
+        processed = _wiener_filter(processed)
+    if use_spectral_subtraction:
+        processed = _spectral_subtraction(processed, sr, settings.primary_gate)
+
+    processed = _apply_spectral_gate(processed, sr, settings.secondary_gate)
+    processed = _limiter(processed, settings.limiter_ceiling)
+    processed = _loudness_normalize(processed, settings.target_lufs)
+    return processed
+
+
+__all__ = [
+    "SpectralGateConfig",
+    "DenoisePreset",
+    "PRESETS",
+    "denoise",
+]
diff --git a/src/audio/pipeline.py b/src/audio/pipeline.py
new file mode 100644
index 0000000..3fdc762
--- /dev/null
+++ b/src/audio/pipeline.py
@@ -0,0 +1,168 @@
+"""Utilities for batch-processing audio files with the denoising chain.
+
+This module provides a simple CLI so datasets or ad-hoc recordings can be
+processed reproducibly with consistent settings. It preserves the original
+sample rate, applies the configurable denoiser, and writes outputs into a
+user-defined directory while keeping relative structure.
+"""
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+from typing import List, Sequence
+
+import librosa
+import numpy as np
+import soundfile as sf
+
+from .denoise import denoise
+
+
+def _gather_audio_files(sources: Sequence[Path], glob: str) -> List[tuple[Path, Path]]:
+    """Return a list of (file_path, relative_parent) pairs to process.
+
+    When a directory is provided, files are gathered recursively using the
+    supplied glob pattern and the relative_parent is the path relative to the
+    provided directory. For individual files, the relative_parent is ``Path("")``.
+    """
+    results: List[tuple[Path, Path]] = []
+    for source in sources:
+        if source.is_dir():
+            for file_path in sorted(source.rglob(glob)):
+                if file_path.is_file():
+                    results.append((file_path, file_path.relative_to(source).parent))
+        elif source.is_file():
+            results.append((source, Path()))
+    return results
+
+
+def process_audio_file(
+    input_path: Path,
+    output_path: Path,
+    *,
+    preset: str = "conservative",
+    use_wiener: bool = False,
+    use_spectral_subtraction: bool = False,
+    use_multiband_gate: bool = False,
+) -> Path:
+    """Run the denoising chain on a single file and save the output.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the audio file to process.
+    output_path:
+        Destination for the processed audio.
+    preset:
+        Denoising preset name (e.g., ``"conservative"`` or ``"aggressive"``).
+    use_wiener:
+        Enable an additional Wiener filtering pass.
+    use_spectral_subtraction:
+        Apply spectral subtraction between the two gating passes.
+    use_multiband_gate:
+        Use a multi-band gate instead of the full-band gate on the first pass.
+    """
+    y, sr = librosa.load(input_path, sr=None, mono=True)
+    processed = denoise(
+        np.asarray(y),
+        sr,
+        preset=preset,
+        use_wiener=use_wiener,
+        use_spectral_subtraction=use_spectral_subtraction,
+        use_multiband_gate=use_multiband_gate,
+    )
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    sf.write(output_path, processed, sr)
+    return output_path
+
+
+def process_batch(
+    inputs: Sequence[Path],
+    output_dir: Path,
+    *,
+    glob: str = "*.wav",
+    preset: str = "conservative",
+    use_wiener: bool = False,
+    use_spectral_subtraction: bool = False,
+    use_multiband_gate: bool = False,
+) -> List[Path]:
+    """Process many files and return the list of written output paths."""
+    audio_files = _gather_audio_files(inputs, glob)
+    outputs: List[Path] = []
+    for input_path, relative_parent in audio_files:
+        output_subdir = output_dir / relative_parent
+        output_path = output_subdir / f"{input_path.stem}_denoised_{preset}.wav"
+        processed_path = process_audio_file(
+            input_path,
+            output_path,
+            preset=preset,
+            use_wiener=use_wiener,
+            use_spectral_subtraction=use_spectral_subtraction,
+            use_multiband_gate=use_multiband_gate,
+        )
+        outputs.append(processed_path)
+    return outputs
+
+
+def _parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Batch denoise audio files with presets.")
+    parser.add_argument(
+        "inputs",
+        nargs="+",
+        type=Path,
+        help="One or more audio files or directories containing audio files.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        type=Path,
+        default=Path("denoised"),
+        help="Where processed files should be written (directories are recreated).",
+    )
+    parser.add_argument(
+        "--glob",
+        type=str,
+        default="*.wav",
+        help="Glob pattern to search for audio when inputs include directories.",
+    )
+    parser.add_argument(
+        "--preset",
+        choices=["conservative", "aggressive"],
+        default="conservative",
+        help="Which denoising preset to use.",
+    )
+    parser.add_argument(
+        "--wiener",
+        action="store_true",
+        help="Enable an additional Wiener filtering stage between gates.",
+    )
+    parser.add_argument(
+        "--spectral-subtraction",
+        action="store_true",
+        help="Run spectral subtraction between the two gating passes.",
+    )
+    parser.add_argument(
+        "--multiband-gate",
+        action="store_true",
+        help="Use the multi-band gate on the first pass (low/mid/high split).",
+    )
+    return parser.parse_args(argv)
+
+
+def main(argv: Sequence[str] | None = None) -> None:
+    args = _parse_args(argv)
+    outputs = process_batch(
+        args.inputs,
+        args.output_dir,
+        glob=args.glob,
+        preset=args.preset,
+        use_wiener=args.wiener,
+        use_spectral_subtraction=args.spectral_subtraction,
+        use_multiband_gate=args.multiband_gate,
+    )
+    for path in outputs:
+        print(path)
+
+
+if __name__ == "__main__":
+    main()