testsAndMisc/linux_configuration/scripts/misc/testsAndMisc-bash/transcribe.sh

#!/usr/bin/env bash
set -euo pipefail

# Transcribe an audio file using faster-whisper with automatic setup.
# - Creates Python venv in .venv
# - Installs ffmpeg and espeak-ng (best-effort) for test audio generation
# - Installs faster-whisper (and CUDA stack if NVIDIA is present)
# - Runs tools/transcribe_fw.py to produce .txt and .srt next to the input

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$SCRIPT_DIR"
TOOLS_DIR="$PROJECT_DIR/tools"
PY_RUNNER="$TOOLS_DIR/transcribe_fw.py"
PY_HELPERS="$TOOLS_DIR/transcribe_helpers.py"
VENV_DIR="$PROJECT_DIR/.venv"

usage() {
  cat << USAGE
Usage: $(basename "$0") [--online] [--prepare-model NAME --model-dir DIR] [-m model] [-l lang] [-o outdir] [audio_file]

Options:
	--online              Allow network to install deps and/or download models (default: offline)
	--prepare-model NAME  Download a model for offline use (implies --online)
	--model-dir DIR       Directory to store or load local models (default: ./models)
	-m model              Model size or path (tiny, base, small, medium, large-v3, etc.). Default: large-v3
	-l lang               Language code (e.g., en). Default: auto-detect
	-o outdir             Output directory (default: alongside input)
	[env] FW_DIARIZE=1    Enable diarization (speaker labels). Optional: FW_NUM_SPEAKERS=N. When --online, installs soundfile, speechbrain, and CPU-only torch/torchaudio.
	-h                    Show help
USAGE
}

log() {
  echo "[$(date +'%H:%M:%S')]" "$@"
}

detect_pkg_mgr() {
  if command -v apt-get > /dev/null 2>&1; then
    echo apt
    return
  fi
  if command -v dnf > /dev/null 2>&1; then
    echo dnf
    return
  fi
  if command -v yum > /dev/null 2>&1; then
    echo yum
    return
  fi
  if command -v pacman > /dev/null 2>&1; then
    echo pacman
    return
  fi
  if command -v zypper > /dev/null 2>&1; then
    echo zypper
    return
  fi
  echo none
}

has_libcublas12() {
  # Common system locations
  for d in \
    /usr/lib \
    /usr/lib64 \
    /usr/local/cuda/lib64 \
    /usr/local/cuda-12*/lib64 \
    /opt/cuda/lib64 \
    /opt/cuda/targets/x86_64-linux/lib; do
    if [[ -e "$d/libcublas.so.12" ]]; then
      return 0
    fi
  done
  # venv-provided NVIDIA CUDA libs
  if [[ -x "$VENV_DIR/bin/python" ]]; then
    local pyver
    pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2> /dev/null || true)"
    if [[ -n $pyver ]]; then
      for d in "$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib" \
        "$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib" \
        "$VENV_DIR/lib/python$pyver/site-packages/nvidia/cuda_runtime/lib"; do
        if [[ -e "$d/libcublas.so.12" ]]; then
          return 0
        fi
      done
    fi
  fi
  return 1
}

ensure_cuda_runtime() {
  local mgr
  mgr="$(detect_pkg_mgr)"
  if [[ $OFFLINE -eq 1 ]]; then
    if has_libcublas12; then return 0; fi
    echo "CUDA runtime (libcublas.so.12) not found and offline mode is enabled. Install CUDA 12 runtime or rerun with --online." >&2
    exit 6
  fi
  if has_libcublas12; then
    return 0
  fi
  if ! command -v sudo > /dev/null 2>&1; then
    log "sudo not found; skipping CUDA runtime install attempt."
  else
    log "CUDA cuBLAS 12 not found; attempting to install CUDA runtime (manager: $mgr)"
    set +e
    case "$mgr" in
      pacman)
        sudo pacman -Sy --noconfirm cuda cudnn || true
        ;;
      apt)
        sudo apt-get update -y || true
        sudo apt-get install -y nvidia-cuda-toolkit || true
        ;;
      dnf | yum)
        sudo "$mgr" install -y cuda cudnn || true
        ;;
      zypper)
        sudo zypper install -y cuda cudnn || true
        ;;
      *) log "Unknown package manager; cannot install CUDA automatically." ;;
    esac
    set -e
  fi
  # Re-check
  if ! has_libcublas12; then
    echo "CUDA runtime (libcublas.so.12) not found after attempted install. Please install CUDA 12 toolkit/runtime and re-run." >&2
    exit 6
  fi
}

install_system_deps() {
  have_cmd() { command -v "$1" > /dev/null 2>&1; }
  local need_ffmpeg=0 need_espeak=0
  have_cmd ffmpeg || need_ffmpeg=1
  have_cmd espeak-ng || need_espeak=1

  # If diarization requested and online, we may also try to ensure libsndfile
  local need_libsndfile=0
  if [[ ${FW_DIARIZE:-} == "1" ]]; then
    # Heuristic: check common library file
    if [[ ! -e /usr/lib/x86_64-linux-gnu/libsndfile.so && ! -e /usr/lib/libsndfile.so && ! -e /usr/lib64/libsndfile.so ]]; then
      need_libsndfile=1
    fi
  fi

  if [[ $need_ffmpeg -eq 0 && $need_espeak -eq 0 && $need_libsndfile -eq 0 ]]; then
    log "System deps present: ffmpeg, espeak-ng${FW_DIARIZE:+, libsndfile}"
    return 0
  fi

  if [[ $OFFLINE -eq 1 ]]; then
    echo "Missing system dependencies (ffmpeg/espeak-ng) but running in offline mode. Install them or rerun with --online." >&2
    exit 5
  fi

  local mgr
  mgr="$(detect_pkg_mgr)"
  log "Detected package manager: $mgr (installing missing: $([[ $need_ffmpeg -eq 1 ]] && echo ffmpeg)$([[ $need_espeak -eq 1 ]] && echo espeak-ng)$([[ $need_libsndfile -eq 1 ]] && echo libsndfile))"

  if ! command -v sudo > /dev/null 2>&1; then
    log "sudo not found; skipping system package installation attempt."
    return 0
  fi

  # Avoid exiting on install errors; continue best-effort
  set +e
  case "$mgr" in
    apt)
      sudo apt-get update -y || log "apt-get update failed; continuing"
      pkgs=(python3-venv python3-pip)
      [[ $need_ffmpeg -eq 1 ]] && pkgs+=(ffmpeg)
      [[ $need_espeak -eq 1 ]] && pkgs+=(espeak-ng)
      if [[ $need_libsndfile -eq 1 ]]; then
        # Try both names across releases
        pkgs+=(libsndfile1)
        sudo apt-get install -y libsndfile1 || true
        # If that failed, try libsndfile2 (newer distros)
        sudo apt-get install -y libsndfile2 || true
      fi
      sudo apt-get install -y "${pkgs[@]}" || log "apt-get install failed; continuing"
      ;;
    dnf)
      pkgs=(python3-venv python3-pip)
      [[ $need_ffmpeg -eq 1 ]] && pkgs+=(ffmpeg)
      [[ $need_espeak -eq 1 ]] && pkgs+=(espeak-ng)
      [[ $need_libsndfile -eq 1 ]] && pkgs+=(libsndfile)
      sudo dnf install -y "${pkgs[@]}" || log "dnf install failed; continuing"
      ;;
    yum)
      pkgs=(python3-venv python3-pip)
      [[ $need_ffmpeg -eq 1 ]] && pkgs+=(ffmpeg)
      [[ $need_espeak -eq 1 ]] && pkgs+=(espeak-ng)
      [[ $need_libsndfile -eq 1 ]] && pkgs+=(libsndfile)
      sudo yum install -y "${pkgs[@]}" || log "yum install failed; continuing"
      ;;
    pacman)
      pkgs=(python-virtualenv python-pip)
      [[ $need_ffmpeg -eq 1 ]] && pkgs+=(ffmpeg)
      [[ $need_espeak -eq 1 ]] && pkgs+=(espeak-ng)
      [[ $need_libsndfile -eq 1 ]] && pkgs+=(libsndfile)
      sudo pacman -Sy --noconfirm "${pkgs[@]}" || log "pacman install failed; continuing"
      ;;
    zypper)
      pkgs=(python311-virtualenv python311-pip)
      [[ $need_ffmpeg -eq 1 ]] && pkgs+=(ffmpeg)
      [[ $need_espeak -eq 1 ]] && pkgs+=(espeak-ng)
      [[ $need_libsndfile -eq 1 ]] && pkgs+=(libsndfile1)
      sudo zypper install -y "${pkgs[@]}" || log "zypper install failed; continuing"
      ;;
    *)
      log "Unknown package manager; please ensure ffmpeg and espeak-ng are installed."
      ;;
  esac
  set -e
}

setup_venv() {
  if [[ ! -d $VENV_DIR ]]; then
    log "Creating venv at $VENV_DIR"
    python3 -m venv "$VENV_DIR"
  fi
  # shellcheck disable=SC1091
  source "$VENV_DIR/bin/activate"
  if [[ $OFFLINE -eq 0 ]]; then
    python -m pip install --upgrade pip wheel setuptools
  fi
}

install_python_deps() {
  # Install deps; if NVIDIA GPU is present, prefer CUDA-capable stack (cu12)
  local has_nvidia_flag="${1:-0}"
  log "Installing faster-whisper and dependencies"
  export PIP_DISABLE_PIP_VERSION_CHECK=1
  export PIP_DEFAULT_TIMEOUT=${PIP_DEFAULT_TIMEOUT:-20}
  if [[ $OFFLINE -eq 1 ]]; then
    # Offline: do not install, just verify modules
    if ! python "$PY_HELPERS" check-faster-whisper; then
      exit 7
    fi
    # If diarization requested offline, check for its deps too (warn-only)
    if [[ ${FW_DIARIZE:-} == "1" ]]; then
      python "$PY_HELPERS" check-diarization || true
    fi
    return 0
  fi
  if [[ $has_nvidia_flag -eq 1 ]]; then
    # If ctranslate2 is not installed, attempt CUDA-enabled wheel (with fallback)
    if ! "$VENV_DIR/bin/python" "$PY_HELPERS" check-ctranslate2 2> /dev/null; then
      log "Installing CUDA-enabled CTranslate2 (cu12 wheel)"
      python -m pip install --progress-bar on --retries 1 --upgrade "ctranslate2<5,>=4.0" --extra-index-url https://download.opennmt.net/ctranslate2/cu12 ||
        log "Warning: could not reach cu12 wheel index; will proceed with available ctranslate2"
    fi
    # Ensure NVIDIA CUDA 12 runtime libs are available inside the venv
    python -m pip install --progress-bar on --retries 1 --upgrade nvidia-cublas-cu12 nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 ||
      log "Warning: failed to install NVIDIA cu12 runtime libs via pip"
  fi
  python -m pip install --progress-bar on --retries 1 --upgrade faster-whisper ffmpeg-python

  # If diarization requested and online, install its Python deps best-effort
  if [[ ${FW_DIARIZE:-} == "1" ]]; then
    python -m pip install --progress-bar on --retries 1 --upgrade soundfile speechbrain ||
      log "Warning: failed to install soundfile/speechbrain"
    # Torch and torchaudio CPU wheels (force to avoid mismatched CUDA builds)
    python -m pip install --progress-bar on --retries 1 --upgrade --force-reinstall --index-url https://download.pytorch.org/whl/cpu torch torchaudio ||
      log "Warning: failed to install torch/torchaudio CPU wheels"
  fi
  python "$PY_HELPERS" deps-installed
}

ensure_runner() {
  if [[ ! -f $PY_RUNNER ]]; then
    echo "Runner not found: $PY_RUNNER" >&2
    exit 3
  fi
}

generate_test_audio() {
  local tmpwav
  tmpwav="${PROJECT_DIR}/test_fw.wav"
  if command -v espeak-ng > /dev/null 2>&1; then
    log "Generating test audio via espeak-ng -> $tmpwav" >&2
    espeak-ng -w "$tmpwav" "This is a quick test of faster whisper transcription." > /dev/null 2>&1 || true
  fi
  # If espeak-ng failed or not present, try espeak
  if [[ ! -s $tmpwav ]] && command -v espeak > /dev/null 2>&1; then
    log "espeak-ng unavailable or failed; trying espeak -> $tmpwav" >&2
    espeak -w "$tmpwav" "This is a quick test of faster whisper transcription." > /dev/null 2>&1 || true
  fi
  # Fallback: generate tone via Python stdlib (no external deps)
  if [[ ! -s $tmpwav ]]; then
    log "Generating 3s 1kHz WAV via Python stdlib -> $tmpwav" >&2
    python3 "$PY_HELPERS" generate-wav --file "$tmpwav" || true
  fi
  # Final fallback: tone via ffmpeg
  if [[ ! -s $tmpwav ]]; then
    log "Creating a 3s sine tone WAV via ffmpeg -> $tmpwav" >&2
    ffmpeg -f lavfi -i sine=frequency=1000:duration=3 -ar 16000 -ac 1 -f wav -y "$tmpwav" > /dev/null 2>&1 || true
  fi
  echo "$tmpwav"
}

prepare_model() {
  # Download a model for offline use into MODEL_DIR
  local name="$1"
  mkdir -p "$MODEL_DIR"
  # shellcheck disable=SC1091
  source "$VENV_DIR/bin/activate"
  log "Preparing model '$name' into $MODEL_DIR"
  python "$PY_HELPERS" prepare-model --model "$name" --model-dir "$MODEL_DIR"
}

main() {
  # Defaults
  OFFLINE=1
  PREPARE_MODEL=""
  MODEL_DIR="$PROJECT_DIR/models"
  MODEL="large-v3"
  LANGUAGE=""
  OUTDIR=""
  INPUT_FILE=""

  # Parse args
  PARSED=$(getopt -o m:l:o:h -l online,prepare-model:,model-dir: -- "$@") || {
    usage
    exit 2
  }
  eval set -- "$PARSED"
  while true; do
    case "$1" in
      -m)
        MODEL="$2"
        shift 2
        ;;
      -l)
        LANGUAGE="$2"
        shift 2
        ;;
      -o)
        OUTDIR="$2"
        shift 2
        ;;
      -h)
        usage
        exit 0
        ;;
      --online)
        OFFLINE=0
        shift
        ;;
      --prepare-model)
        PREPARE_MODEL="$2"
        OFFLINE=0
        shift 2
        ;;
      --model-dir)
        MODEL_DIR="$2"
        shift 2
        ;;
      --)
        shift
        break
        ;;
      *) break ;;
    esac
  done
  INPUT_FILE="${1:-}"

  if [[ $OFFLINE -eq 1 ]]; then
    export HF_HUB_OFFLINE=1
    export TRANSFORMERS_OFFLINE=1
  fi

  install_system_deps
  setup_venv

  # If asked to prepare a model, do that and exit
  if [[ -n $PREPARE_MODEL ]]; then
    if [[ $OFFLINE -eq 1 ]]; then
      echo "--prepare-model requires network; rerun with --online." >&2
      exit 2
    fi
    install_python_deps 0
    prepare_model "$PREPARE_MODEL"
    log "Model '$PREPARE_MODEL' downloaded to $MODEL_DIR"
    exit 0
  fi

  # Detect NVIDIA GPU and enforce CUDA if present
  has_nvidia=0
  if command -v nvidia-smi > /dev/null 2>&1 && nvidia-smi -L > /dev/null 2>&1; then
    has_nvidia=1
  fi
  install_python_deps "$has_nvidia"
  ensure_runner

  local input="$INPUT_FILE"
  if [[ -z $input ]]; then
    input="$(generate_test_audio)"
    if [[ ! -s $input ]]; then
      echo "Failed to generate test audio. Please provide an audio file." >&2
      exit 4
    fi
  fi

  if [[ ! -f $input ]]; then
    echo "Input file not found: $input" >&2
    exit 2
  fi

  local args=("$input" "--model" "$MODEL")
  [[ -n $LANGUAGE ]] && args+=("--language" "$LANGUAGE")
  [[ -n $OUTDIR ]] && args+=("--outdir" "$OUTDIR")

  # Pass diarization via env if requested
  if [[ ${FW_DIARIZE:-} == "1" ]]; then
    args+=("--diarize")
    if [[ -n ${FW_NUM_SPEAKERS:-} ]]; then
      args+=("--num-speakers" "${FW_NUM_SPEAKERS}")
    fi
  fi

  if [[ $has_nvidia -eq 1 ]]; then
    ensure_cuda_runtime
    # Export common CUDA paths in case the env lacks them
    export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}"
    # Include system and possible venv-provided CUDA libs
    local pyver venv_cuda_paths=""
    if [[ -x "$VENV_DIR/bin/python" ]]; then
      pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2> /dev/null || true)"
      if [[ -n $pyver ]]; then
        venv_cuda_paths="$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cuda_runtime/lib"
      fi
    fi
    export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}:${CUDA_HOME}/lib64:/usr/lib/x86_64-linux-gnu:/opt/cuda/lib64:/opt/cuda/targets/x86_64-linux/lib:${venv_cuda_paths}"
    export PATH="${PATH}:${CUDA_HOME}/bin"
    # shellcheck disable=SC1091
    source "$VENV_DIR/bin/activate"
    python "$PY_HELPERS" test-cuda || {
      echo "CUDA environment check failed. Aborting as requested." >&2
      exit 6
    }
    args+=("--device" "cuda")
  fi

  log "Transcribing: $input"
  # shellcheck disable=SC1091
  source "$VENV_DIR/bin/activate"
  if [[ $has_nvidia -eq 1 ]]; then
    if ! python "$PY_RUNNER" "${args[@]}"; then
      echo "CUDA execution requested due to detected NVIDIA GPU, but it failed. Aborting as requested (no CPU fallback)." >&2
      exit 6
    fi
  else
    # Offline: prefer local directory if present; otherwise use cache without network
    if [[ $OFFLINE -eq 1 ]]; then
      local local_model_path=""
      if [[ -d $MODEL ]]; then
        local_model_path="$MODEL"
      elif [[ -d "$MODEL_DIR/$MODEL" ]]; then
        local_model_path="$MODEL_DIR/$MODEL"
      fi
      if [[ -n $local_model_path ]]; then
        args=("$input" "--model" "$local_model_path")
        [[ -n $LANGUAGE ]] && args+=("--language" "$LANGUAGE")
        [[ -n $OUTDIR ]] && args+=("--outdir" "$OUTDIR")
      fi
    fi
    python "$PY_RUNNER" "${args[@]}"
  fi
}

main "$@"