From 0447fd7d40219077a50583464a443b1bcf6a4341 Mon Sep 17 00:00:00 2001 From: "gecko.pico" Date: Sun, 30 Nov 2025 10:54:11 +1100 Subject: [PATCH] Add batch denoising CLI and helpers --- README.md | 25 +++++ src/audio/__init__.py | 13 +++ src/audio/denoise.py | 209 ++++++++++++++++++++++++++++++++++++++++++ src/audio/pipeline.py | 168 +++++++++++++++++++++++++++++++++ 4 files changed, 415 insertions(+) create mode 100644 src/audio/__init__.py create mode 100644 src/audio/denoise.py create mode 100644 src/audio/pipeline.py diff --git a/README.md b/README.md index bebcb32..45eb3f9 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,27 @@ # V2kAudioProcessor Process audio to find voices in it. Used to process V2k recordings and find the audio within it. + +## Batch denoising CLI +You can quickly run the spectral denoising pipeline over one or more audio files (or folders full of files) without writing code: + +```bash +python -m audio.pipeline path/to/input.wav another_dir/ \ + --output-dir denoised_outputs \ + --preset aggressive \ + --multiband-gate --wiener +``` + +- Accepts individual files and directories (searched recursively with `--glob`, default `*.wav`). +- Preserves relative paths inside the output directory and appends a `_denoised_.wav` suffix. +- Supports the same optional stages as the library API: multi-band gating, Wiener filtering, and spectral subtraction. + +The underlying functions are available from Python if you want to script your own evaluation against a set of test clips: + +```python +from pathlib import Path +from audio import process_batch + +outputs = process_batch([Path("tests/inputs")], Path("tests/outputs"), preset="conservative") +for out in outputs: + print(out) +``` diff --git a/src/audio/__init__.py b/src/audio/__init__.py new file mode 100644 index 0000000..49a10f2 --- /dev/null +++ b/src/audio/__init__.py @@ -0,0 +1,13 @@ +"""Audio processing utilities and batch helpers.""" + +from .denoise import DenoisePreset, PRESETS, SpectralGateConfig, denoise +from .pipeline import process_audio_file, process_batch + +__all__ = [ + "SpectralGateConfig", + "DenoisePreset", + "PRESETS", + "denoise", + "process_audio_file", + "process_batch", +] diff --git a/src/audio/denoise.py b/src/audio/denoise.py new file mode 100644 index 0000000..2ff9756 --- /dev/null +++ b/src/audio/denoise.py @@ -0,0 +1,209 @@ +from dataclasses import dataclass +from typing import Dict, Optional, Tuple + +import librosa +import numpy as np +from scipy import signal + + +@dataclass +class SpectralGateConfig: + threshold_db: float = -30.0 + reduction_db: float = -20.0 + min_energy_percentile: float = 0.2 + attack: float = 0.01 # seconds + release: float = 0.08 # seconds + n_fft: int = 1024 + hop_length: int = 256 + + +@dataclass +class DenoisePreset: + gain_db: float + bandpass: Tuple[float, float] + primary_gate: SpectralGateConfig + secondary_gate: SpectralGateConfig + limiter_ceiling: float + target_lufs: float + + +PRESETS: Dict[str, DenoisePreset] = { + "conservative": DenoisePreset( + gain_db=0.0, + bandpass=(60.0, 18000.0), + primary_gate=SpectralGateConfig( + threshold_db=-28.0, reduction_db=-18.0, min_energy_percentile=0.25, attack=0.015, release=0.1 + ), + secondary_gate=SpectralGateConfig( + threshold_db=-32.0, reduction_db=-22.0, min_energy_percentile=0.2, attack=0.01, release=0.08 + ), + limiter_ceiling=0.98, + target_lufs=-16.0, + ), + "aggressive": DenoisePreset( + gain_db=3.0, + bandpass=(80.0, 16000.0), + primary_gate=SpectralGateConfig( + threshold_db=-26.0, reduction_db=-14.0, min_energy_percentile=0.35, attack=0.02, release=0.12 + ), + secondary_gate=SpectralGateConfig( + threshold_db=-30.0, reduction_db=-26.0, min_energy_percentile=0.3, attack=0.015, release=0.1 + ), + limiter_ceiling=0.96, + target_lufs=-15.0, + ), +} + + +def _db_to_amplitude(db_value: float) -> float: + return 10 ** (db_value / 20.0) + + +def _apply_gain(audio: np.ndarray, gain_db: float) -> np.ndarray: + if gain_db == 0: + return audio + return audio * _db_to_amplitude(gain_db) + + +def _bandpass_filter(audio: np.ndarray, sr: int, low: float, high: float, order: int = 4) -> np.ndarray: + nyquist = sr / 2.0 + low = max(1.0, low) + high = min(high, nyquist - 1.0) + sos = signal.butter(order, [low / nyquist, high / nyquist], btype="bandpass", output="sos") + return signal.sosfiltfilt(sos, audio) + + +def _estimate_noise_profile( + y: np.ndarray, config: SpectralGateConfig, energy_percentile: Optional[float] = None +) -> np.ndarray: + percentile = energy_percentile if energy_percentile is not None else config.min_energy_percentile + S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length) + magnitude = np.abs(S) + frame_energy = magnitude.mean(axis=0) + cutoff = np.quantile(frame_energy, percentile) + noise_frames = magnitude[:, frame_energy <= cutoff] + if noise_frames.size == 0: + noise_frames = magnitude + noise_profile = np.mean(noise_frames, axis=1) + return noise_profile + + +def _smooth_mask(mask: np.ndarray, attack_frames: int, release_frames: int) -> np.ndarray: + smoothed = np.zeros_like(mask, dtype=float) + for freq in range(mask.shape[0]): + state = 0.0 + for t in range(mask.shape[1]): + target = mask[freq, t] + coeff = 1.0 / attack_frames if target > state else 1.0 / release_frames + state += coeff * (target - state) + smoothed[freq, t] = state + return smoothed + + +def _apply_spectral_gate(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray: + S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length) + magnitude = np.abs(S) + phase = np.angle(S) + + noise_profile = _estimate_noise_profile(y, config) + threshold = noise_profile[:, None] * _db_to_amplitude(config.threshold_db) + + mask = magnitude > threshold + reduction = _db_to_amplitude(config.reduction_db) + + hop_duration = config.hop_length / float(sr) + attack_frames = max(1, int(config.attack / hop_duration)) + release_frames = max(1, int(config.release / hop_duration)) + smoothed_mask = _smooth_mask(mask.astype(float), attack_frames, release_frames) + + gated_magnitude = magnitude * (smoothed_mask + (1 - smoothed_mask) * reduction) + Y = gated_magnitude * np.exp(1j * phase) + return librosa.istft(Y, hop_length=config.hop_length, length=len(y)) + + +def _spectral_subtraction(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray: + S = librosa.stft(y, n_fft=config.n_fft, hop_length=config.hop_length) + magnitude = np.abs(S) + phase = np.angle(S) + noise_profile = _estimate_noise_profile(y, config, energy_percentile=0.4) + adjusted = np.maximum(magnitude - noise_profile[:, None], 0.0) + return librosa.istft(adjusted * np.exp(1j * phase), hop_length=config.hop_length, length=len(y)) + + +def _wiener_filter(y: np.ndarray, size: int = 11) -> np.ndarray: + return signal.wiener(y, mysize=size) + + +def _multi_band_gate(y: np.ndarray, sr: int, config: SpectralGateConfig) -> np.ndarray: + nyquist = sr / 2.0 + bands = [ + (30.0, min(200.0, nyquist - 1.0)), + (200.0, min(2000.0, nyquist - 1.0)), + (2000.0, min(8000.0, nyquist - 1.0)), + ] + band_signals = [] + for low, high in bands: + filtered = _bandpass_filter(y, sr, low, high) + gated = _apply_spectral_gate(filtered, sr, config) + band_signals.append(gated) + combined = np.sum(band_signals, axis=0) / max(len(band_signals), 1) + peak = np.max(np.abs(combined)) + 1e-9 + if peak > 1.0: + combined /= peak + return combined + + +def _limiter(y: np.ndarray, ceiling: float) -> np.ndarray: + ceiling = max(0.0, min(1.0, ceiling)) + return np.clip(y, -ceiling, ceiling) + + +def _loudness_normalize(y: np.ndarray, target_lufs: float) -> np.ndarray: + rms = np.sqrt(np.mean(np.square(y))) + 1e-9 + current_lufs = 20 * np.log10(rms) + gain_db = target_lufs - current_lufs + normalized = _apply_gain(y, gain_db) + peak = np.max(np.abs(normalized)) + 1e-9 + if peak > 1.0: + normalized /= peak + return normalized + + +def denoise( + audio: np.ndarray, + sr: int, + preset: str = "conservative", + *, + use_wiener: bool = False, + use_spectral_subtraction: bool = False, + use_multiband_gate: bool = False, +) -> np.ndarray: + if preset not in PRESETS: + raise ValueError(f"Unknown preset '{preset}'. Available: {list(PRESETS.keys())}") + settings = PRESETS[preset] + + processed = _apply_gain(audio, settings.gain_db) + processed = _bandpass_filter(processed, sr, *settings.bandpass) + + if use_multiband_gate: + processed = _multi_band_gate(processed, sr, settings.primary_gate) + else: + processed = _apply_spectral_gate(processed, sr, settings.primary_gate) + + if use_wiener: + processed = _wiener_filter(processed) + if use_spectral_subtraction: + processed = _spectral_subtraction(processed, sr, settings.primary_gate) + + processed = _apply_spectral_gate(processed, sr, settings.secondary_gate) + processed = _limiter(processed, settings.limiter_ceiling) + processed = _loudness_normalize(processed, settings.target_lufs) + return processed + + +__all__ = [ + "SpectralGateConfig", + "DenoisePreset", + "PRESETS", + "denoise", +] diff --git a/src/audio/pipeline.py b/src/audio/pipeline.py new file mode 100644 index 0000000..3fdc762 --- /dev/null +++ b/src/audio/pipeline.py @@ -0,0 +1,168 @@ +"""Utilities for batch-processing audio files with the denoising chain. + +This module provides a simple CLI so datasets or ad-hoc recordings can be +processed reproducibly with consistent settings. It preserves the original +sample rate, applies the configurable denoiser, and writes outputs into a +user-defined directory while keeping relative structure. +""" +from __future__ import annotations + +import argparse +from pathlib import Path +from typing import List, Sequence + +import librosa +import numpy as np +import soundfile as sf + +from .denoise import denoise + + +def _gather_audio_files(sources: Sequence[Path], glob: str) -> List[tuple[Path, Path]]: + """Return a list of (file_path, relative_parent) pairs to process. + + When a directory is provided, files are gathered recursively using the + supplied glob pattern and the relative_parent is the path relative to the + provided directory. For individual files, the relative_parent is ``Path("")``. + """ + results: List[tuple[Path, Path]] = [] + for source in sources: + if source.is_dir(): + for file_path in sorted(source.rglob(glob)): + if file_path.is_file(): + results.append((file_path, file_path.relative_to(source).parent)) + elif source.is_file(): + results.append((source, Path())) + return results + + +def process_audio_file( + input_path: Path, + output_path: Path, + *, + preset: str = "conservative", + use_wiener: bool = False, + use_spectral_subtraction: bool = False, + use_multiband_gate: bool = False, +) -> Path: + """Run the denoising chain on a single file and save the output. + + Parameters + ---------- + input_path: + Path to the audio file to process. + output_path: + Destination for the processed audio. + preset: + Denoising preset name (e.g., ``"conservative"`` or ``"aggressive"``). + use_wiener: + Enable an additional Wiener filtering pass. + use_spectral_subtraction: + Apply spectral subtraction between the two gating passes. + use_multiband_gate: + Use a multi-band gate instead of the full-band gate on the first pass. + """ + y, sr = librosa.load(input_path, sr=None, mono=True) + processed = denoise( + np.asarray(y), + sr, + preset=preset, + use_wiener=use_wiener, + use_spectral_subtraction=use_spectral_subtraction, + use_multiband_gate=use_multiband_gate, + ) + output_path.parent.mkdir(parents=True, exist_ok=True) + sf.write(output_path, processed, sr) + return output_path + + +def process_batch( + inputs: Sequence[Path], + output_dir: Path, + *, + glob: str = "*.wav", + preset: str = "conservative", + use_wiener: bool = False, + use_spectral_subtraction: bool = False, + use_multiband_gate: bool = False, +) -> List[Path]: + """Process many files and return the list of written output paths.""" + audio_files = _gather_audio_files(inputs, glob) + outputs: List[Path] = [] + for input_path, relative_parent in audio_files: + output_subdir = output_dir / relative_parent + output_path = output_subdir / f"{input_path.stem}_denoised_{preset}.wav" + processed_path = process_audio_file( + input_path, + output_path, + preset=preset, + use_wiener=use_wiener, + use_spectral_subtraction=use_spectral_subtraction, + use_multiband_gate=use_multiband_gate, + ) + outputs.append(processed_path) + return outputs + + +def _parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Batch denoise audio files with presets.") + parser.add_argument( + "inputs", + nargs="+", + type=Path, + help="One or more audio files or directories containing audio files.", + ) + parser.add_argument( + "-o", + "--output-dir", + type=Path, + default=Path("denoised"), + help="Where processed files should be written (directories are recreated).", + ) + parser.add_argument( + "--glob", + type=str, + default="*.wav", + help="Glob pattern to search for audio when inputs include directories.", + ) + parser.add_argument( + "--preset", + choices=["conservative", "aggressive"], + default="conservative", + help="Which denoising preset to use.", + ) + parser.add_argument( + "--wiener", + action="store_true", + help="Enable an additional Wiener filtering stage between gates.", + ) + parser.add_argument( + "--spectral-subtraction", + action="store_true", + help="Run spectral subtraction between the two gating passes.", + ) + parser.add_argument( + "--multiband-gate", + action="store_true", + help="Use the multi-band gate on the first pass (low/mid/high split).", + ) + return parser.parse_args(argv) + + +def main(argv: Sequence[str] | None = None) -> None: + args = _parse_args(argv) + outputs = process_batch( + args.inputs, + args.output_dir, + glob=args.glob, + preset=args.preset, + use_wiener=args.wiener, + use_spectral_subtraction=args.spectral_subtraction, + use_multiband_gate=args.multiband_gate, + ) + for path in outputs: + print(path) + + +if __name__ == "__main__": + main()