testsAndMisc/scripts/misc/testsAndMisc-bash/clean_audio.sh
Krzysztof Rudnicki 43e2799156 Import testsAndMisc/Bash as subtree (preserve history)
git-subtree-dir: scripts/misc/testsAndMisc-bash
git-subtree-mainline: d19bb5915a
git-subtree-split: 49bcc2ca68
2025-11-01 16:27:05 +01:00

391 lines
11 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# clean_audio.sh — Fully automatic audio cleaner for speech (ASR-friendly)
#
# - Default preset is tuned for ASR (faster-whisper):
# mono, 16 kHz, high-pass filter, denoise (RNNoise arnndn by default if model found/provided; else afftdn),
# peak limiting to -0.5 dBFS. No aggressive gating/compression by default.
# - Optional "podcast" preset adds gentle dynamics and loudness leveling.
# - Accepts single files or directories (recursively).
# - Optional parallel processing.
#
# Dependencies: ffmpeg (arnndn filter recommended for best results)
# Optional: an RNNoise model file for arnndn (auto-discovered if present; otherwise falls back to afftdn)
#
# Usage examples:
# Bash/clean_audio.sh input.wav # -> input_clean.wav (same folder)
# Bash/clean_audio.sh input.wav -O out_dir # -> out_dir/input_clean.wav
# Bash/clean_audio.sh input_dir -O cleaned/ -j 4 # -> processes all audio files in dir
# Bash/clean_audio.sh input.wav -m models/rn.nn # -> use RNNoise model
# Bash/clean_audio.sh input.wav --preset podcast # -> add dynamics leveler
#
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
print_usage() {
cat <<EOF
Usage: $0 <input-file|input-dir> [options]
Options:
-O, --out-dir DIR Output directory (default: alongside input file).
-e, --ext EXT Output extension/container: wav|flac (default: wav).
-m, --model PATH RNNoise model file for arnndn; required by default unless --allow-fallback.
--no-ml Do not use arnndn even if model is provided (requires --allow-fallback).
--preset NAME asr (default) | podcast | aggressive
-j, --jobs N Parallel jobs for directory mode (default: 1).
-f, --force Overwrite outputs if they exist (ffmpeg -y).
-q, --quiet Reduce ffmpeg logging noise.
--lowpass FREQ Optional low-pass cutoff (e.g., 8000). Disabled by default.
--suffix SUF Suffix for output basename (default: _clean).
-h, --help Show this help.
Notes:
- Default sample rate is 16 kHz mono PCM 16-bit (good for most speech ASR models).
- If arnndn (RNNoise) is used, it usually outperforms afftdn for speech denoise.
- The 'podcast' preset adds gentle dynamics and loudness normalization (single-pass).
EOF
}
require_cmd() {
command -v "$1" >/dev/null 2>&1 || {
echo "Error: Required command '$1' not found in PATH" >&2
exit 1
}
}
# Defaults
OUT_DIR=""
OUT_EXT="wav"
RN_MODEL=""
NO_ML=false
REQUIRE_ML=true # default: require RNNoise; install/guide if missing; fail fast if unavailable
PRESET="asr"
JOBS=1
FORCE=false
QUIET=false
LOWPASS=""
SUFFIX="_clean"
HIGHPASS="80"
AFFTDN_NF="-25" # noise floor in dB for afftdn
AFFTDN_MD="8" # mode for afftdn (higher can be more aggressive); requires builds that support 'md'
NO_ADVANCED=false # when true, avoid advanced options that some ffmpeg builds lack
# Parse args
if [[ $# -lt 1 ]]; then
print_usage
exit 1
fi
INPUT_PATH="$1"; shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
-O|--out-dir)
OUT_DIR="$2"; shift 2;;
-e|--ext)
OUT_EXT="$2"; shift 2;;
-m|--model)
RN_MODEL="$2"; shift 2;;
--no-ml)
NO_ML=true; shift;;
--preset)
PRESET="$2"; shift 2;;
-j|--jobs)
JOBS="$2"; shift 2;;
-f|--force)
FORCE=true; shift;;
-q|--quiet)
QUIET=true; shift;;
--lowpass)
LOWPASS="$2"; shift 2;;
--suffix)
SUFFIX="$2"; shift 2;;
--no-advanced|--compat)
NO_ADVANCED=true; shift;;
--allow-fallback)
REQUIRE_ML=false; shift;;
-h|--help)
print_usage; exit 0;;
*)
echo "Unknown option: $1" >&2
print_usage
exit 1;;
esac
done
require_cmd ffmpeg
# Resolve FFmpeg binary (env override -> local build -> system)
FFMPEG_BIN=${FFMPEG_BIN:-}
if [[ -z "${FFMPEG_BIN}" ]]; then
if [[ -x "$SCRIPT_DIR/ffmpeg-build/FFmpeg/ffmpeg" ]]; then
FFMPEG_BIN="$SCRIPT_DIR/ffmpeg-build/FFmpeg/ffmpeg"
else
FFMPEG_BIN="ffmpeg"
fi
fi
if ! command -v "$FFMPEG_BIN" >/dev/null 2>&1 && [[ ! -x "$FFMPEG_BIN" ]]; then
echo "Error: FFmpeg binary not found: $FFMPEG_BIN" >&2
exit 1
fi
if ! $QUIET; then
echo "Using FFmpeg binary: $FFMPEG_BIN" >&2
fi
FFMPEG_LOG=(-hide_banner)
if $QUIET; then
FFMPEG_LOG+=( -loglevel error )
else
FFMPEG_LOG+=( -loglevel info )
fi
FFMPEG_OVERWRITE=(-n)
if $FORCE; then
FFMPEG_OVERWRITE=(-y)
fi
arnndn_available=false
if "$FFMPEG_BIN" -hide_banner -h filter=arnndn >/dev/null 2>&1; then
arnndn_available=true
else
if "$FFMPEG_BIN" -hide_banner -filters 2>/dev/null | grep -Eq '(^|[[:space:]])arnndn([[:space:]]|$)'; then
arnndn_available=true
fi
fi
if ! $QUIET; then
echo "arnndn_available=$arnndn_available" >&2
fi
# Check if afftdn supports 'md' option
afftdn_supports_md=false
if "$FFMPEG_BIN" -hide_banner -h filter=afftdn 2>/dev/null | grep -q " md="; then
afftdn_supports_md=true
fi
# Try to auto-discover an RNNoise model if none provided
find_default_rn_model() {
local candidate=""
# Allow env variable override
if [[ -n "${RNNOISE_MODEL:-}" && -f "${RNNOISE_MODEL}" ]]; then
echo "${RNNOISE_MODEL}"
return 0
fi
local dirs=(
"$SCRIPT_DIR/models"
"$SCRIPT_DIR/../models"
"/usr/share/rnnoise"
"/usr/local/share/rnnoise"
"/usr/share/ffmpeg/models"
"$HOME/.local/share/rnnoise"
)
# Prefer '.rnnn' models (rnnoise-nu style) over legacy '.nn'
local exts=("rnnn" "nn" "model")
for d in "${dirs[@]}"; do
if [[ -d "$d" ]]; then
for ext in "${exts[@]}"; do
# Pick the first matching model file
for f in "$d"/*."$ext"; do
if [[ -f "$f" ]]; then
echo "$f"
return 0
fi
done
done
fi
done
return 1
}
use_arnndn=false
if [[ $NO_ML == false ]]; then
if [[ $arnndn_available == false ]]; then
if $REQUIRE_ML; then
echo "Error: FFmpeg 'arnndn' filter not available. Please install/upgrade FFmpeg with librnnoise (see Bash/install_ffmpeg_with_arnndn.sh)." >&2
exit 9
fi
else
# arnndn available; require an external model
if [[ -n "$RN_MODEL" && -f "$RN_MODEL" ]]; then
:
else
if model_path=$(find_default_rn_model); then
RN_MODEL="$model_path"
else
if [[ -x "$SCRIPT_DIR/get_rnnoise_model.sh" ]]; then
RN_TARGET_DIR="$SCRIPT_DIR/models" RN_TARGET_NAME="rnnoise_model.rnnn" "$SCRIPT_DIR/get_rnnoise_model.sh" --yes || true
if model_path=$(find_default_rn_model); then
RN_MODEL="$model_path"
fi
fi
fi
fi
if [[ -z "$RN_MODEL" ]]; then
echo "Error: RNNoise model required but not found. Automatic download failed." >&2
echo "Hint: Set RN_URL to a reachable model URL and run Bash/get_rnnoise_model.sh, or supply -m /path/to/model.nn." >&2
exit 10
fi
use_arnndn=true
echo "Using RNNoise external model: $RN_MODEL" >&2
fi
fi
build_filters() {
local filters=()
# Remove low-frequency rumble typical for handheld/room noise
filters+=("highpass=f=${HIGHPASS}")
# Denoise
if $use_arnndn; then
# arnndn with full mix keeps the model output; if no external model, rely on built-in
filters+=("aresample=48000")
filters+=("arnndn=m=${RN_MODEL}:mix=1.0")
else
# afftdn: FFT-based denoise, tune nf (noise floor) as needed
if $REQUIRE_ML; then
echo "Error: RNNoise required but not in use; aborting rather than falling back to afftdn. Use --allow-fallback to permit." >&2
exit 11
fi
if $NO_ADVANCED; then
filters+=("afftdn=nf=${AFFTDN_NF}")
else
if $afftdn_supports_md; then
filters+=("afftdn=nf=${AFFTDN_NF}:md=${AFFTDN_MD}")
else
echo "Error: Your ffmpeg's afftdn filter does not support 'md='." >&2
echo "Hint: Install/upgrade ffmpeg to a build that supports afftdn md or rerun with --no-advanced." >&2
echo " On Debian/Ubuntu you may need a newer ffmpeg from a PPA or build from source." >&2
exit 8
fi
fi
fi
# Optional low-pass to shave hiss; keep disabled unless requested
if [[ -n "$LOWPASS" ]]; then
filters+=("lowpass=f=${LOWPASS}")
fi
case "$PRESET" in
asr)
# ASR-friendly: avoid heavy gating/leveling, just prevent clipping
filters+=("alimiter=limit=0.94")
;;
podcast)
# Gentle dynamic normalization and broadcast-ish loudness (single-pass)
# Note: single-pass loudnorm is approximate but OK for quick workflows
filters+=("dynaudnorm=f=500:g=5:p=0.1")
filters+=("loudnorm=i=-18:lra=9:tp=-2.0")
;;
aggressive)
# Heavier clean-up; may harm ASR slightly but suppress background more
filters+=("agate=threshold=0.012:ratio=2.5:release=200")
filters+=("dynaudnorm=f=400:g=7:p=0.1")
filters+=("loudnorm=i=-18:lra=9:tp=-2.0")
;;
*) ;;
esac
# Resample and format at the end for ASR
filters+=("aresample=16000")
filters+=("aformat=channel_layouts=mono:sample_fmts=s16")
local IFS=","; echo "${filters[*]}"
}
make_out_path_for_file() {
local in_file="$1"
local base
base=$(basename -- "$in_file")
base="${base%.*}"
local out_base="${base}${SUFFIX}.${OUT_EXT}"
if [[ -n "$OUT_DIR" ]]; then
mkdir -p -- "$OUT_DIR"
echo "$OUT_DIR/$out_base"
else
local dir
dir=$(dirname -- "$in_file")
echo "$dir/$out_base"
fi
}
process_one() {
local in_file="$1"
local out_file
out_file=$(make_out_path_for_file "$in_file")
# Choose codec based on extension
local codec=( -c:a pcm_s16le )
if [[ "$OUT_EXT" == "flac" ]]; then
codec=( -c:a flac )
fi
local af
af=$(build_filters)
if [[ -f "$out_file" && $FORCE == false ]]; then
echo "Skip (exists): $out_file"
return 0
fi
echo "Cleaning: $in_file -> $out_file"
"$FFMPEG_BIN" "${FFMPEG_LOG[@]}" "${FFMPEG_OVERWRITE[@]}" -i "$in_file" -af "$af" "${codec[@]}" "$out_file"
}
# Concurrency helpers (bash >= 5 supports wait -n; fallback to sequential if not)
supports_wait_n=false
if [[ -n "${BASH_VERSINFO:-}" && ${BASH_VERSINFO[0]} -ge 5 ]]; then
supports_wait_n=true
fi
run_dir() {
local dir="$1"
# Common audio extensions (case-insensitive)
mapfile -d '' files < <(find "$dir" -type f \
\( -iname "*.wav" -o -iname "*.mp3" -o -iname "*.m4a" -o -iname "*.aac" -o -iname "*.flac" \
-o -iname "*.ogg" -o -iname "*.opus" -o -iname "*.wma" -o -iname "*.webm" \) -print0)
if [[ ${#files[@]} -eq 0 ]]; then
echo "No audio files found in: $dir"
return 0
fi
local running=0
for f in "${files[@]}"; do
if [[ "$JOBS" -le 1 || $supports_wait_n == false ]]; then
process_one "$f"
else
process_one "$f" &
((running++))
if (( running >= JOBS )); then
wait -n || true
((running--))
fi
fi
done
# Wait for any remaining background jobs
if (( JOBS > 1 )) && $supports_wait_n; then
wait || true
fi
}
main() {
# Sanity checks and notices
if [[ -n "$RN_MODEL" && $use_arnndn == false && $NO_ML == false ]]; then
echo "Note: arnndn filter not available in your ffmpeg or model missing — using afftdn." >&2
fi
if [[ -f "$INPUT_PATH" ]]; then
process_one "$INPUT_PATH"
elif [[ -d "$INPUT_PATH" ]]; then
run_dir "$INPUT_PATH"
else
echo "Error: Input path not found: $INPUT_PATH" >&2
exit 1
fi
}
main "$@"