#!/usr/bin/env bash set -euo pipefail # clean_audio.sh — Fully automatic audio cleaner for speech (ASR-friendly) # # - Default preset is tuned for ASR (faster-whisper): # mono, 16 kHz, high-pass filter, denoise (RNNoise arnndn by default if model found/provided; else afftdn), # peak limiting to -0.5 dBFS. No aggressive gating/compression by default. # - Optional "podcast" preset adds gentle dynamics and loudness leveling. # - Accepts single files or directories (recursively). # - Optional parallel processing. # # Dependencies: ffmpeg (arnndn filter recommended for best results) # Optional: an RNNoise model file for arnndn (auto-discovered if present; otherwise falls back to afftdn) # # Usage examples: # Bash/clean_audio.sh input.wav # -> input_clean.wav (same folder) # Bash/clean_audio.sh input.wav -O out_dir # -> out_dir/input_clean.wav # Bash/clean_audio.sh input_dir -O cleaned/ -j 4 # -> processes all audio files in dir # Bash/clean_audio.sh input.wav -m models/rn.nn # -> use RNNoise model # Bash/clean_audio.sh input.wav --preset podcast # -> add dynamics leveler # SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) print_usage() { cat < [options] Options: -O, --out-dir DIR Output directory (default: alongside input file). -e, --ext EXT Output extension/container: wav|flac (default: wav). -m, --model PATH RNNoise model file for arnndn; required by default unless --allow-fallback. --no-ml Do not use arnndn even if model is provided (requires --allow-fallback). --preset NAME asr (default) | podcast | aggressive -j, --jobs N Parallel jobs for directory mode (default: 1). -f, --force Overwrite outputs if they exist (ffmpeg -y). -q, --quiet Reduce ffmpeg logging noise. --lowpass FREQ Optional low-pass cutoff (e.g., 8000). Disabled by default. --suffix SUF Suffix for output basename (default: _clean). -h, --help Show this help. Notes: - Default sample rate is 16 kHz mono PCM 16-bit (good for most speech ASR models). - If arnndn (RNNoise) is used, it usually outperforms afftdn for speech denoise. - The 'podcast' preset adds gentle dynamics and loudness normalization (single-pass). EOF } require_cmd() { command -v "$1" >/dev/null 2>&1 || { echo "Error: Required command '$1' not found in PATH" >&2 exit 1 } } # Defaults OUT_DIR="" OUT_EXT="wav" RN_MODEL="" NO_ML=false REQUIRE_ML=true # default: require RNNoise; install/guide if missing; fail fast if unavailable PRESET="asr" JOBS=1 FORCE=false QUIET=false LOWPASS="" SUFFIX="_clean" HIGHPASS="80" AFFTDN_NF="-25" # noise floor in dB for afftdn AFFTDN_MD="8" # mode for afftdn (higher can be more aggressive); requires builds that support 'md' NO_ADVANCED=false # when true, avoid advanced options that some ffmpeg builds lack # Parse args if [[ $# -lt 1 ]]; then print_usage exit 1 fi INPUT_PATH="$1"; shift || true while [[ $# -gt 0 ]]; do case "$1" in -O|--out-dir) OUT_DIR="$2"; shift 2;; -e|--ext) OUT_EXT="$2"; shift 2;; -m|--model) RN_MODEL="$2"; shift 2;; --no-ml) NO_ML=true; shift;; --preset) PRESET="$2"; shift 2;; -j|--jobs) JOBS="$2"; shift 2;; -f|--force) FORCE=true; shift;; -q|--quiet) QUIET=true; shift;; --lowpass) LOWPASS="$2"; shift 2;; --suffix) SUFFIX="$2"; shift 2;; --no-advanced|--compat) NO_ADVANCED=true; shift;; --allow-fallback) REQUIRE_ML=false; shift;; -h|--help) print_usage; exit 0;; *) echo "Unknown option: $1" >&2 print_usage exit 1;; esac done require_cmd ffmpeg # Resolve FFmpeg binary (env override -> local build -> system) FFMPEG_BIN=${FFMPEG_BIN:-} if [[ -z "${FFMPEG_BIN}" ]]; then if [[ -x "$SCRIPT_DIR/ffmpeg-build/FFmpeg/ffmpeg" ]]; then FFMPEG_BIN="$SCRIPT_DIR/ffmpeg-build/FFmpeg/ffmpeg" else FFMPEG_BIN="ffmpeg" fi fi if ! command -v "$FFMPEG_BIN" >/dev/null 2>&1 && [[ ! -x "$FFMPEG_BIN" ]]; then echo "Error: FFmpeg binary not found: $FFMPEG_BIN" >&2 exit 1 fi if ! $QUIET; then echo "Using FFmpeg binary: $FFMPEG_BIN" >&2 fi FFMPEG_LOG=(-hide_banner) if $QUIET; then FFMPEG_LOG+=( -loglevel error ) else FFMPEG_LOG+=( -loglevel info ) fi FFMPEG_OVERWRITE=(-n) if $FORCE; then FFMPEG_OVERWRITE=(-y) fi arnndn_available=false if "$FFMPEG_BIN" -hide_banner -h filter=arnndn >/dev/null 2>&1; then arnndn_available=true else if "$FFMPEG_BIN" -hide_banner -filters 2>/dev/null | grep -Eq '(^|[[:space:]])arnndn([[:space:]]|$)'; then arnndn_available=true fi fi if ! $QUIET; then echo "arnndn_available=$arnndn_available" >&2 fi # Check if afftdn supports 'md' option afftdn_supports_md=false if "$FFMPEG_BIN" -hide_banner -h filter=afftdn 2>/dev/null | grep -q " md="; then afftdn_supports_md=true fi # Try to auto-discover an RNNoise model if none provided find_default_rn_model() { local candidate="" # Allow env variable override if [[ -n "${RNNOISE_MODEL:-}" && -f "${RNNOISE_MODEL}" ]]; then echo "${RNNOISE_MODEL}" return 0 fi local dirs=( "$SCRIPT_DIR/models" "$SCRIPT_DIR/../models" "/usr/share/rnnoise" "/usr/local/share/rnnoise" "/usr/share/ffmpeg/models" "$HOME/.local/share/rnnoise" ) # Prefer '.rnnn' models (rnnoise-nu style) over legacy '.nn' local exts=("rnnn" "nn" "model") for d in "${dirs[@]}"; do if [[ -d "$d" ]]; then for ext in "${exts[@]}"; do # Pick the first matching model file for f in "$d"/*."$ext"; do if [[ -f "$f" ]]; then echo "$f" return 0 fi done done fi done return 1 } use_arnndn=false if [[ $NO_ML == false ]]; then if [[ $arnndn_available == false ]]; then if $REQUIRE_ML; then echo "Error: FFmpeg 'arnndn' filter not available. Please install/upgrade FFmpeg with librnnoise (see Bash/install_ffmpeg_with_arnndn.sh)." >&2 exit 9 fi else # arnndn available; require an external model if [[ -n "$RN_MODEL" && -f "$RN_MODEL" ]]; then : else if model_path=$(find_default_rn_model); then RN_MODEL="$model_path" else if [[ -x "$SCRIPT_DIR/get_rnnoise_model.sh" ]]; then RN_TARGET_DIR="$SCRIPT_DIR/models" RN_TARGET_NAME="rnnoise_model.rnnn" "$SCRIPT_DIR/get_rnnoise_model.sh" --yes || true if model_path=$(find_default_rn_model); then RN_MODEL="$model_path" fi fi fi fi if [[ -z "$RN_MODEL" ]]; then echo "Error: RNNoise model required but not found. Automatic download failed." >&2 echo "Hint: Set RN_URL to a reachable model URL and run Bash/get_rnnoise_model.sh, or supply -m /path/to/model.nn." >&2 exit 10 fi use_arnndn=true echo "Using RNNoise external model: $RN_MODEL" >&2 fi fi build_filters() { local filters=() # Remove low-frequency rumble typical for handheld/room noise filters+=("highpass=f=${HIGHPASS}") # Denoise if $use_arnndn; then # arnndn with full mix keeps the model output; if no external model, rely on built-in filters+=("aresample=48000") filters+=("arnndn=m=${RN_MODEL}:mix=1.0") else # afftdn: FFT-based denoise, tune nf (noise floor) as needed if $REQUIRE_ML; then echo "Error: RNNoise required but not in use; aborting rather than falling back to afftdn. Use --allow-fallback to permit." >&2 exit 11 fi if $NO_ADVANCED; then filters+=("afftdn=nf=${AFFTDN_NF}") else if $afftdn_supports_md; then filters+=("afftdn=nf=${AFFTDN_NF}:md=${AFFTDN_MD}") else echo "Error: Your ffmpeg's afftdn filter does not support 'md='." >&2 echo "Hint: Install/upgrade ffmpeg to a build that supports afftdn md or rerun with --no-advanced." >&2 echo " On Debian/Ubuntu you may need a newer ffmpeg from a PPA or build from source." >&2 exit 8 fi fi fi # Optional low-pass to shave hiss; keep disabled unless requested if [[ -n "$LOWPASS" ]]; then filters+=("lowpass=f=${LOWPASS}") fi case "$PRESET" in asr) # ASR-friendly: avoid heavy gating/leveling, just prevent clipping filters+=("alimiter=limit=0.94") ;; podcast) # Gentle dynamic normalization and broadcast-ish loudness (single-pass) # Note: single-pass loudnorm is approximate but OK for quick workflows filters+=("dynaudnorm=f=500:g=5:p=0.1") filters+=("loudnorm=i=-18:lra=9:tp=-2.0") ;; aggressive) # Heavier clean-up; may harm ASR slightly but suppress background more filters+=("agate=threshold=0.012:ratio=2.5:release=200") filters+=("dynaudnorm=f=400:g=7:p=0.1") filters+=("loudnorm=i=-18:lra=9:tp=-2.0") ;; *) ;; esac # Resample and format at the end for ASR filters+=("aresample=16000") filters+=("aformat=channel_layouts=mono:sample_fmts=s16") local IFS=","; echo "${filters[*]}" } make_out_path_for_file() { local in_file="$1" local base base=$(basename -- "$in_file") base="${base%.*}" local out_base="${base}${SUFFIX}.${OUT_EXT}" if [[ -n "$OUT_DIR" ]]; then mkdir -p -- "$OUT_DIR" echo "$OUT_DIR/$out_base" else local dir dir=$(dirname -- "$in_file") echo "$dir/$out_base" fi } process_one() { local in_file="$1" local out_file out_file=$(make_out_path_for_file "$in_file") # Choose codec based on extension local codec=( -c:a pcm_s16le ) if [[ "$OUT_EXT" == "flac" ]]; then codec=( -c:a flac ) fi local af af=$(build_filters) if [[ -f "$out_file" && $FORCE == false ]]; then echo "Skip (exists): $out_file" return 0 fi echo "Cleaning: $in_file -> $out_file" "$FFMPEG_BIN" "${FFMPEG_LOG[@]}" "${FFMPEG_OVERWRITE[@]}" -i "$in_file" -af "$af" "${codec[@]}" "$out_file" } # Concurrency helpers (bash >= 5 supports wait -n; fallback to sequential if not) supports_wait_n=false if [[ -n "${BASH_VERSINFO:-}" && ${BASH_VERSINFO[0]} -ge 5 ]]; then supports_wait_n=true fi run_dir() { local dir="$1" # Common audio extensions (case-insensitive) mapfile -d '' files < <(find "$dir" -type f \ \( -iname "*.wav" -o -iname "*.mp3" -o -iname "*.m4a" -o -iname "*.aac" -o -iname "*.flac" \ -o -iname "*.ogg" -o -iname "*.opus" -o -iname "*.wma" -o -iname "*.webm" \) -print0) if [[ ${#files[@]} -eq 0 ]]; then echo "No audio files found in: $dir" return 0 fi local running=0 for f in "${files[@]}"; do if [[ "$JOBS" -le 1 || $supports_wait_n == false ]]; then process_one "$f" else process_one "$f" & ((running++)) if (( running >= JOBS )); then wait -n || true ((running--)) fi fi done # Wait for any remaining background jobs if (( JOBS > 1 )) && $supports_wait_n; then wait || true fi } main() { # Sanity checks and notices if [[ -n "$RN_MODEL" && $use_arnndn == false && $NO_ML == false ]]; then echo "Note: arnndn filter not available in your ffmpeg or model missing — using afftdn." >&2 fi if [[ -f "$INPUT_PATH" ]]; then process_one "$INPUT_PATH" elif [[ -d "$INPUT_PATH" ]]; then run_dir "$INPUT_PATH" else echo "Error: Input path not found: $INPUT_PATH" >&2 exit 1 fi } main "$@"