mirror of
https://github.com/kuhyx/scripts.git
synced 2026-07-04 14:43:08 +02:00
391 lines
11 KiB
Bash
Executable File
391 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -euo pipefail
|
|
|
|
# clean_audio.sh — Fully automatic audio cleaner for speech (ASR-friendly)
|
|
#
|
|
# - Default preset is tuned for ASR (faster-whisper):
|
|
# mono, 16 kHz, high-pass filter, denoise (RNNoise arnndn by default if model found/provided; else afftdn),
|
|
# peak limiting to -0.5 dBFS. No aggressive gating/compression by default.
|
|
# - Optional "podcast" preset adds gentle dynamics and loudness leveling.
|
|
# - Accepts single files or directories (recursively).
|
|
# - Optional parallel processing.
|
|
#
|
|
# Dependencies: ffmpeg (arnndn filter recommended for best results)
|
|
# Optional: an RNNoise model file for arnndn (auto-discovered if present; otherwise falls back to afftdn)
|
|
#
|
|
# Usage examples:
|
|
# Bash/clean_audio.sh input.wav # -> input_clean.wav (same folder)
|
|
# Bash/clean_audio.sh input.wav -O out_dir # -> out_dir/input_clean.wav
|
|
# Bash/clean_audio.sh input_dir -O cleaned/ -j 4 # -> processes all audio files in dir
|
|
# Bash/clean_audio.sh input.wav -m models/rn.nn # -> use RNNoise model
|
|
# Bash/clean_audio.sh input.wav --preset podcast # -> add dynamics leveler
|
|
#
|
|
|
|
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
|
|
|
print_usage() {
|
|
cat <<EOF
|
|
Usage: $0 <input-file|input-dir> [options]
|
|
|
|
Options:
|
|
-O, --out-dir DIR Output directory (default: alongside input file).
|
|
-e, --ext EXT Output extension/container: wav|flac (default: wav).
|
|
-m, --model PATH RNNoise model file for arnndn; required by default unless --allow-fallback.
|
|
--no-ml Do not use arnndn even if model is provided (requires --allow-fallback).
|
|
--preset NAME asr (default) | podcast | aggressive
|
|
-j, --jobs N Parallel jobs for directory mode (default: 1).
|
|
-f, --force Overwrite outputs if they exist (ffmpeg -y).
|
|
-q, --quiet Reduce ffmpeg logging noise.
|
|
--lowpass FREQ Optional low-pass cutoff (e.g., 8000). Disabled by default.
|
|
--suffix SUF Suffix for output basename (default: _clean).
|
|
-h, --help Show this help.
|
|
|
|
Notes:
|
|
- Default sample rate is 16 kHz mono PCM 16-bit (good for most speech ASR models).
|
|
- If arnndn (RNNoise) is used, it usually outperforms afftdn for speech denoise.
|
|
- The 'podcast' preset adds gentle dynamics and loudness normalization (single-pass).
|
|
EOF
|
|
}
|
|
|
|
require_cmd() {
|
|
command -v "$1" >/dev/null 2>&1 || {
|
|
echo "Error: Required command '$1' not found in PATH" >&2
|
|
exit 1
|
|
}
|
|
}
|
|
|
|
# Defaults
|
|
OUT_DIR=""
|
|
OUT_EXT="wav"
|
|
RN_MODEL=""
|
|
NO_ML=false
|
|
REQUIRE_ML=true # default: require RNNoise; install/guide if missing; fail fast if unavailable
|
|
PRESET="asr"
|
|
JOBS=1
|
|
FORCE=false
|
|
QUIET=false
|
|
LOWPASS=""
|
|
SUFFIX="_clean"
|
|
HIGHPASS="80"
|
|
AFFTDN_NF="-25" # noise floor in dB for afftdn
|
|
AFFTDN_MD="8" # mode for afftdn (higher can be more aggressive); requires builds that support 'md'
|
|
NO_ADVANCED=false # when true, avoid advanced options that some ffmpeg builds lack
|
|
|
|
# Parse args
|
|
if [[ $# -lt 1 ]]; then
|
|
print_usage
|
|
exit 1
|
|
fi
|
|
|
|
INPUT_PATH="$1"; shift || true
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-O|--out-dir)
|
|
OUT_DIR="$2"; shift 2;;
|
|
-e|--ext)
|
|
OUT_EXT="$2"; shift 2;;
|
|
-m|--model)
|
|
RN_MODEL="$2"; shift 2;;
|
|
--no-ml)
|
|
NO_ML=true; shift;;
|
|
--preset)
|
|
PRESET="$2"; shift 2;;
|
|
-j|--jobs)
|
|
JOBS="$2"; shift 2;;
|
|
-f|--force)
|
|
FORCE=true; shift;;
|
|
-q|--quiet)
|
|
QUIET=true; shift;;
|
|
--lowpass)
|
|
LOWPASS="$2"; shift 2;;
|
|
--suffix)
|
|
SUFFIX="$2"; shift 2;;
|
|
--no-advanced|--compat)
|
|
NO_ADVANCED=true; shift;;
|
|
--allow-fallback)
|
|
REQUIRE_ML=false; shift;;
|
|
-h|--help)
|
|
print_usage; exit 0;;
|
|
*)
|
|
echo "Unknown option: $1" >&2
|
|
print_usage
|
|
exit 1;;
|
|
esac
|
|
done
|
|
|
|
require_cmd ffmpeg
|
|
|
|
# Resolve FFmpeg binary (env override -> local build -> system)
|
|
FFMPEG_BIN=${FFMPEG_BIN:-}
|
|
if [[ -z "${FFMPEG_BIN}" ]]; then
|
|
if [[ -x "$SCRIPT_DIR/ffmpeg-build/FFmpeg/ffmpeg" ]]; then
|
|
FFMPEG_BIN="$SCRIPT_DIR/ffmpeg-build/FFmpeg/ffmpeg"
|
|
else
|
|
FFMPEG_BIN="ffmpeg"
|
|
fi
|
|
fi
|
|
|
|
if ! command -v "$FFMPEG_BIN" >/dev/null 2>&1 && [[ ! -x "$FFMPEG_BIN" ]]; then
|
|
echo "Error: FFmpeg binary not found: $FFMPEG_BIN" >&2
|
|
exit 1
|
|
fi
|
|
if ! $QUIET; then
|
|
echo "Using FFmpeg binary: $FFMPEG_BIN" >&2
|
|
fi
|
|
|
|
FFMPEG_LOG=(-hide_banner)
|
|
if $QUIET; then
|
|
FFMPEG_LOG+=( -loglevel error )
|
|
else
|
|
FFMPEG_LOG+=( -loglevel info )
|
|
fi
|
|
|
|
FFMPEG_OVERWRITE=(-n)
|
|
if $FORCE; then
|
|
FFMPEG_OVERWRITE=(-y)
|
|
fi
|
|
|
|
arnndn_available=false
|
|
if "$FFMPEG_BIN" -hide_banner -h filter=arnndn >/dev/null 2>&1; then
|
|
arnndn_available=true
|
|
else
|
|
if "$FFMPEG_BIN" -hide_banner -filters 2>/dev/null | grep -Eq '(^|[[:space:]])arnndn([[:space:]]|$)'; then
|
|
arnndn_available=true
|
|
fi
|
|
fi
|
|
if ! $QUIET; then
|
|
echo "arnndn_available=$arnndn_available" >&2
|
|
fi
|
|
|
|
# Check if afftdn supports 'md' option
|
|
afftdn_supports_md=false
|
|
if "$FFMPEG_BIN" -hide_banner -h filter=afftdn 2>/dev/null | grep -q " md="; then
|
|
afftdn_supports_md=true
|
|
fi
|
|
|
|
# Try to auto-discover an RNNoise model if none provided
|
|
find_default_rn_model() {
|
|
local candidate=""
|
|
# Allow env variable override
|
|
if [[ -n "${RNNOISE_MODEL:-}" && -f "${RNNOISE_MODEL}" ]]; then
|
|
echo "${RNNOISE_MODEL}"
|
|
return 0
|
|
fi
|
|
local dirs=(
|
|
"$SCRIPT_DIR/models"
|
|
"$SCRIPT_DIR/../models"
|
|
"/usr/share/rnnoise"
|
|
"/usr/local/share/rnnoise"
|
|
"/usr/share/ffmpeg/models"
|
|
"$HOME/.local/share/rnnoise"
|
|
)
|
|
# Prefer '.rnnn' models (rnnoise-nu style) over legacy '.nn'
|
|
local exts=("rnnn" "nn" "model")
|
|
for d in "${dirs[@]}"; do
|
|
if [[ -d "$d" ]]; then
|
|
for ext in "${exts[@]}"; do
|
|
# Pick the first matching model file
|
|
for f in "$d"/*."$ext"; do
|
|
if [[ -f "$f" ]]; then
|
|
echo "$f"
|
|
return 0
|
|
fi
|
|
done
|
|
done
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
use_arnndn=false
|
|
if [[ $NO_ML == false ]]; then
|
|
if [[ $arnndn_available == false ]]; then
|
|
if $REQUIRE_ML; then
|
|
echo "Error: FFmpeg 'arnndn' filter not available. Please install/upgrade FFmpeg with librnnoise (see Bash/install_ffmpeg_with_arnndn.sh)." >&2
|
|
exit 9
|
|
fi
|
|
else
|
|
# arnndn available; require an external model
|
|
if [[ -n "$RN_MODEL" && -f "$RN_MODEL" ]]; then
|
|
:
|
|
else
|
|
if model_path=$(find_default_rn_model); then
|
|
RN_MODEL="$model_path"
|
|
else
|
|
if [[ -x "$SCRIPT_DIR/get_rnnoise_model.sh" ]]; then
|
|
RN_TARGET_DIR="$SCRIPT_DIR/models" RN_TARGET_NAME="rnnoise_model.rnnn" "$SCRIPT_DIR/get_rnnoise_model.sh" --yes || true
|
|
if model_path=$(find_default_rn_model); then
|
|
RN_MODEL="$model_path"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
if [[ -z "$RN_MODEL" ]]; then
|
|
echo "Error: RNNoise model required but not found. Automatic download failed." >&2
|
|
echo "Hint: Set RN_URL to a reachable model URL and run Bash/get_rnnoise_model.sh, or supply -m /path/to/model.nn." >&2
|
|
exit 10
|
|
fi
|
|
use_arnndn=true
|
|
echo "Using RNNoise external model: $RN_MODEL" >&2
|
|
fi
|
|
fi
|
|
|
|
build_filters() {
|
|
local filters=()
|
|
# Remove low-frequency rumble typical for handheld/room noise
|
|
filters+=("highpass=f=${HIGHPASS}")
|
|
|
|
# Denoise
|
|
if $use_arnndn; then
|
|
# arnndn with full mix keeps the model output; if no external model, rely on built-in
|
|
filters+=("aresample=48000")
|
|
filters+=("arnndn=m=${RN_MODEL}:mix=1.0")
|
|
else
|
|
# afftdn: FFT-based denoise, tune nf (noise floor) as needed
|
|
if $REQUIRE_ML; then
|
|
echo "Error: RNNoise required but not in use; aborting rather than falling back to afftdn. Use --allow-fallback to permit." >&2
|
|
exit 11
|
|
fi
|
|
if $NO_ADVANCED; then
|
|
filters+=("afftdn=nf=${AFFTDN_NF}")
|
|
else
|
|
if $afftdn_supports_md; then
|
|
filters+=("afftdn=nf=${AFFTDN_NF}:md=${AFFTDN_MD}")
|
|
else
|
|
echo "Error: Your ffmpeg's afftdn filter does not support 'md='." >&2
|
|
echo "Hint: Install/upgrade ffmpeg to a build that supports afftdn md or rerun with --no-advanced." >&2
|
|
echo " On Debian/Ubuntu you may need a newer ffmpeg from a PPA or build from source." >&2
|
|
exit 8
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Optional low-pass to shave hiss; keep disabled unless requested
|
|
if [[ -n "$LOWPASS" ]]; then
|
|
filters+=("lowpass=f=${LOWPASS}")
|
|
fi
|
|
|
|
case "$PRESET" in
|
|
asr)
|
|
# ASR-friendly: avoid heavy gating/leveling, just prevent clipping
|
|
filters+=("alimiter=limit=0.94")
|
|
;;
|
|
podcast)
|
|
# Gentle dynamic normalization and broadcast-ish loudness (single-pass)
|
|
# Note: single-pass loudnorm is approximate but OK for quick workflows
|
|
filters+=("dynaudnorm=f=500:g=5:p=0.1")
|
|
filters+=("loudnorm=i=-18:lra=9:tp=-2.0")
|
|
;;
|
|
aggressive)
|
|
# Heavier clean-up; may harm ASR slightly but suppress background more
|
|
filters+=("agate=threshold=0.012:ratio=2.5:release=200")
|
|
filters+=("dynaudnorm=f=400:g=7:p=0.1")
|
|
filters+=("loudnorm=i=-18:lra=9:tp=-2.0")
|
|
;;
|
|
*) ;;
|
|
esac
|
|
|
|
# Resample and format at the end for ASR
|
|
filters+=("aresample=16000")
|
|
filters+=("aformat=channel_layouts=mono:sample_fmts=s16")
|
|
|
|
local IFS=","; echo "${filters[*]}"
|
|
}
|
|
|
|
make_out_path_for_file() {
|
|
local in_file="$1"
|
|
local base
|
|
base=$(basename -- "$in_file")
|
|
base="${base%.*}"
|
|
local out_base="${base}${SUFFIX}.${OUT_EXT}"
|
|
if [[ -n "$OUT_DIR" ]]; then
|
|
mkdir -p -- "$OUT_DIR"
|
|
echo "$OUT_DIR/$out_base"
|
|
else
|
|
local dir
|
|
dir=$(dirname -- "$in_file")
|
|
echo "$dir/$out_base"
|
|
fi
|
|
}
|
|
|
|
process_one() {
|
|
local in_file="$1"
|
|
local out_file
|
|
out_file=$(make_out_path_for_file "$in_file")
|
|
|
|
# Choose codec based on extension
|
|
local codec=( -c:a pcm_s16le )
|
|
if [[ "$OUT_EXT" == "flac" ]]; then
|
|
codec=( -c:a flac )
|
|
fi
|
|
|
|
local af
|
|
af=$(build_filters)
|
|
|
|
if [[ -f "$out_file" && $FORCE == false ]]; then
|
|
echo "Skip (exists): $out_file"
|
|
return 0
|
|
fi
|
|
|
|
echo "Cleaning: $in_file -> $out_file"
|
|
"$FFMPEG_BIN" "${FFMPEG_LOG[@]}" "${FFMPEG_OVERWRITE[@]}" -i "$in_file" -af "$af" "${codec[@]}" "$out_file"
|
|
}
|
|
|
|
# Concurrency helpers (bash >= 5 supports wait -n; fallback to sequential if not)
|
|
supports_wait_n=false
|
|
if [[ -n "${BASH_VERSINFO:-}" && ${BASH_VERSINFO[0]} -ge 5 ]]; then
|
|
supports_wait_n=true
|
|
fi
|
|
|
|
run_dir() {
|
|
local dir="$1"
|
|
# Common audio extensions (case-insensitive)
|
|
mapfile -d '' files < <(find "$dir" -type f \
|
|
\( -iname "*.wav" -o -iname "*.mp3" -o -iname "*.m4a" -o -iname "*.aac" -o -iname "*.flac" \
|
|
-o -iname "*.ogg" -o -iname "*.opus" -o -iname "*.wma" -o -iname "*.webm" \) -print0)
|
|
|
|
if [[ ${#files[@]} -eq 0 ]]; then
|
|
echo "No audio files found in: $dir"
|
|
return 0
|
|
fi
|
|
|
|
local running=0
|
|
for f in "${files[@]}"; do
|
|
if [[ "$JOBS" -le 1 || $supports_wait_n == false ]]; then
|
|
process_one "$f"
|
|
else
|
|
process_one "$f" &
|
|
((running++))
|
|
if (( running >= JOBS )); then
|
|
wait -n || true
|
|
((running--))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Wait for any remaining background jobs
|
|
if (( JOBS > 1 )) && $supports_wait_n; then
|
|
wait || true
|
|
fi
|
|
}
|
|
|
|
main() {
|
|
# Sanity checks and notices
|
|
if [[ -n "$RN_MODEL" && $use_arnndn == false && $NO_ML == false ]]; then
|
|
echo "Note: arnndn filter not available in your ffmpeg or model missing — using afftdn." >&2
|
|
fi
|
|
|
|
if [[ -f "$INPUT_PATH" ]]; then
|
|
process_one "$INPUT_PATH"
|
|
elif [[ -d "$INPUT_PATH" ]]; then
|
|
run_dir "$INPUT_PATH"
|
|
else
|
|
echo "Error: Input path not found: $INPUT_PATH" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
main "$@"
|