diff --git a/scripts/misc/testsAndMisc-bash/tools/__pycache__/transcribe_fw.cpython-313.pyc b/scripts/misc/testsAndMisc-bash/tools/__pycache__/transcribe_fw.cpython-313.pyc new file mode 100644 index 0000000..7f94d78 Binary files /dev/null and b/scripts/misc/testsAndMisc-bash/tools/__pycache__/transcribe_fw.cpython-313.pyc differ diff --git a/scripts/misc/testsAndMisc-bash/tools/__pycache__/transcribe_helpers.cpython-313.pyc b/scripts/misc/testsAndMisc-bash/tools/__pycache__/transcribe_helpers.cpython-313.pyc new file mode 100644 index 0000000..a10cc41 Binary files /dev/null and b/scripts/misc/testsAndMisc-bash/tools/__pycache__/transcribe_helpers.cpython-313.pyc differ diff --git a/scripts/misc/testsAndMisc-bash/tools/transcribe_fw.py b/scripts/misc/testsAndMisc-bash/tools/transcribe_fw.py index 48eff05..72fdc4f 100644 --- a/scripts/misc/testsAndMisc-bash/tools/transcribe_fw.py +++ b/scripts/misc/testsAndMisc-bash/tools/transcribe_fw.py @@ -7,6 +7,121 @@ import sys import time from datetime import timedelta from typing import List, Optional + + +def format_bytes(size: int) -> str: + """Format bytes as human-readable string.""" + for unit in ['B', 'KB', 'MB', 'GB']: + if size < 1024: + return f"{size:.1f}{unit}" + size /= 1024 + return f"{size:.1f}TB" + + +def download_model_with_progress(model_name: str) -> str: + """Download model files from HuggingFace with a visible progress bar. + + Returns the local path to the downloaded model. + """ + try: + from huggingface_hub import snapshot_download, hf_hub_download + from huggingface_hub.utils import EntryNotFoundError + except ImportError: + print("[WARN] huggingface_hub not available, falling back to default download", file=sys.stderr) + return model_name + + # Map common model names to HF repo IDs + model_map = { + "tiny": "Systran/faster-whisper-tiny", + "tiny.en": "Systran/faster-whisper-tiny.en", + "base": "Systran/faster-whisper-base", + "base.en": "Systran/faster-whisper-base.en", + "small": "Systran/faster-whisper-small", + "small.en": "Systran/faster-whisper-small.en", + "medium": "Systran/faster-whisper-medium", + "medium.en": "Systran/faster-whisper-medium.en", + "large-v1": "Systran/faster-whisper-large-v1", + "large-v2": "Systran/faster-whisper-large-v2", + "large-v3": "Systran/faster-whisper-large-v3", + "large": "Systran/faster-whisper-large-v3", + "distil-large-v2": "Systran/faster-distil-whisper-large-v2", + "distil-large-v3": "Systran/faster-distil-whisper-large-v3", + "distil-medium.en": "Systran/faster-distil-whisper-medium.en", + "distil-small.en": "Systran/faster-distil-whisper-small.en", + } + + repo_id = model_map.get(model_name, model_name) + + # Check if it looks like a repo ID + if "/" not in repo_id and model_name not in model_map: + # Assume it's a Systran model + repo_id = f"Systran/faster-whisper-{model_name}" + + print(f"[INFO] Checking model: {repo_id}", flush=True) + + # Files we need to download (model.bin is the large one) + required_files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.txt"] + + try: + # Use snapshot_download which handles caching and shows what's happening + # First, let's check if model.bin needs downloading by checking cache + from huggingface_hub import try_to_load_from_cache, HfFileSystem + + cache_path = try_to_load_from_cache(repo_id, "model.bin") + if cache_path is not None: + print(f"[INFO] Model already cached, loading from: {os.path.dirname(cache_path)}", flush=True) + # Return the directory containing the cached files + return os.path.dirname(cache_path) + + # Model not cached, need to download + print(f"[INFO] Downloading model files from {repo_id}...", flush=True) + print("[INFO] This may take several minutes for large models (~3GB for large-v3)", flush=True) + + # Get file sizes to show progress + try: + fs = HfFileSystem() + files_info = fs.ls(repo_id, detail=True) + total_size = sum(f.get('size', 0) for f in files_info if f.get('name', '').split('/')[-1] in required_files) + print(f"[INFO] Total download size: ~{format_bytes(total_size)}", flush=True) + except Exception: + pass # Size info is optional + + # Download with progress + downloaded = 0 + start_time = time.time() + + for filename in required_files: + file_start = time.time() + print(f"[DOWNLOAD] {filename}...", end=" ", flush=True) + try: + local_path = hf_hub_download( + repo_id=repo_id, + filename=filename, + resume_download=True, + ) + elapsed = time.time() - file_start + file_size = os.path.getsize(local_path) if os.path.exists(local_path) else 0 + print(f"done ({format_bytes(file_size)}, {elapsed:.1f}s)", flush=True) + downloaded += 1 + + # Return directory on first successful download + if downloaded == 1: + model_dir = os.path.dirname(local_path) + except EntryNotFoundError: + print("not found (optional)", flush=True) + except Exception as e: + print(f"error: {e}", flush=True) + + total_time = time.time() - start_time + print(f"[INFO] Download complete in {total_time:.1f}s", flush=True) + + return model_dir + + except Exception as e: + print(f"[WARN] Custom download failed ({e}), falling back to default", file=sys.stderr) + return model_name + + def format_timestamp(seconds: float) -> str: td = timedelta(seconds=seconds) # Ensure SRT format HH:MM:SS,mmm @@ -324,7 +439,21 @@ def main(): compute_type = "float16" if device == "cuda" else "float32" print(f"[INFO] Loading model='{args.model}', device='{device}', compute_type='{compute_type}'") - model = WhisperModel(args.model, device=device, compute_type=compute_type) + + # Pre-download model files with explicit progress if not already cached + model_path = args.model + if not os.path.isdir(args.model): # Not a local path, need to download from HF + model_path = download_model_with_progress(args.model) + + # Show CTranslate2 conversion progress + import logging + logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') + ct2_logger = logging.getLogger("faster_whisper") + ct2_logger.setLevel(logging.INFO) + + print("[INFO] Initializing model...", flush=True) + model = WhisperModel(model_path, device=device, compute_type=compute_type) + print("[INFO] Model loaded successfully.", flush=True) # Transcription with live progress total_duration = get_media_duration(inp) diff --git a/scripts/misc/testsAndMisc-bash/tools/transcribe_helpers.py b/scripts/misc/testsAndMisc-bash/tools/transcribe_helpers.py new file mode 100644 index 0000000..a39756b --- /dev/null +++ b/scripts/misc/testsAndMisc-bash/tools/transcribe_helpers.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +"""Helper utilities for transcribe.sh - replaces inline Python snippets.""" + +import argparse +import math +import os +import sys +import array +import wave + + +def get_python_version() -> str: + """Return Python major.minor version string.""" + return f"{sys.version_info.major}.{sys.version_info.minor}" + + +def check_faster_whisper() -> bool: + """Check if faster_whisper is importable. Exit 7 if not.""" + try: + import faster_whisper # noqa: F401 + return True + except ImportError: + return False + + +def check_diarization_deps() -> bool: + """Check if diarization dependencies are available. Returns False with warning if missing.""" + try: + import soundfile # noqa: F401 + import speechbrain # noqa: F401 + import torch # noqa: F401 + return True + except Exception as e: + print(f"[WARN] Diarization deps missing offline ({e}); speaker labels will be skipped.") + return False + + +def check_ctranslate2() -> bool: + """Check if ctranslate2 is importable.""" + try: + import ctranslate2 # noqa: F401 + return True + except ImportError: + return False + + +def print_deps_installed(): + """Print confirmation that Python dependencies are installed.""" + print(f"[PY] Python {sys.version.split()[0]} dependencies installed.") + + +def generate_sine_wav(outfile: str, frequency: float = 1000.0, duration: int = 3, + sample_rate: int = 16000, amplitude: float = 0.3) -> bool: + """Generate a sine wave WAV file using only Python stdlib. + + Args: + outfile: Output WAV file path + frequency: Tone frequency in Hz (default: 1000) + duration: Duration in seconds (default: 3) + sample_rate: Sample rate in Hz (default: 16000) + amplitude: Amplitude 0.0-1.0 (default: 0.3) + + Returns: + True on success, False on failure + """ + try: + n_samples = sample_rate * duration + data = array.array("h", [ + int(max(-1.0, min(1.0, amplitude * math.sin(2 * math.pi * frequency * (i / sample_rate)))) * 32767) + for i in range(n_samples) + ]) + with wave.open(outfile, "w") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(sample_rate) + wf.writeframes(data.tobytes()) + return True + except Exception as e: + print(f"[ERROR] Failed to generate WAV: {e}", file=sys.stderr) + return False + + +def prepare_model(model_name: str, model_dir: str) -> bool: + """Download a whisper model for offline use. + + Args: + model_name: Model name (tiny, base, small, medium, large-v3, etc.) + model_dir: Directory to store the model + + Returns: + True on success, False on failure + """ + try: + from faster_whisper import WhisperModel + + # Enable HuggingFace Hub progress bars for model download + try: + from huggingface_hub import logging as hf_logging + hf_logging.set_verbosity_info() + import huggingface_hub + huggingface_hub.constants.HF_HUB_DISABLE_PROGRESS_BARS = False + os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0" + except ImportError: + pass + + print(f"[PY] Preparing model '{model_name}' into {model_dir}") + print("[INFO] Downloading model files (progress bar should appear below)...", flush=True) + WhisperModel(model_name, device="cpu", compute_type="int8", download_root=model_dir) + print("[PY] Model prepared.") + return True + except Exception as e: + print(f"[ERROR] Failed to prepare model: {e}", file=sys.stderr) + return False + + +def test_cuda() -> bool: + """Test CUDA initialization with faster-whisper. + + Returns: + True if CUDA works, False otherwise + """ + try: + from faster_whisper import WhisperModel + WhisperModel("tiny", device="cuda", compute_type="float16") + print("[PY] CUDA test init succeeded.") + return True + except Exception as e: + print(f"[ERROR] CUDA test failed: {e}", file=sys.stderr) + return False + + +def main(): + parser = argparse.ArgumentParser( + description="Helper utilities for transcribe.sh", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Commands: + python-version Print Python major.minor version + check-faster-whisper Check if faster_whisper is installed (exit 7 if not) + check-diarization Check diarization deps (warn if missing) + check-ctranslate2 Check if ctranslate2 is installed (exit 1 if not) + deps-installed Print deps installed confirmation message + generate-wav FILE Generate a 3s 1kHz sine wave WAV file + prepare-model Download model for offline use (requires --model and --model-dir) + test-cuda Test CUDA initialization +""") + parser.add_argument("command", choices=[ + "python-version", + "check-faster-whisper", + "check-diarization", + "check-ctranslate2", + "deps-installed", + "generate-wav", + "prepare-model", + "test-cuda", + ], help="Command to run") + parser.add_argument("--file", help="Output file path (for generate-wav)") + parser.add_argument("--model", help="Model name (for prepare-model)") + parser.add_argument("--model-dir", help="Model directory (for prepare-model)") + + args = parser.parse_args() + + if args.command == "python-version": + print(get_python_version()) + elif args.command == "check-faster-whisper": + if not check_faster_whisper(): + print("Python dependency 'faster_whisper' not found in offline mode. Run with --online to install.", file=sys.stderr) + sys.exit(7) + elif args.command == "check-diarization": + check_diarization_deps() + elif args.command == "check-ctranslate2": + if not check_ctranslate2(): + sys.exit(1) + elif args.command == "deps-installed": + print_deps_installed() + elif args.command == "generate-wav": + if not args.file: + print("--file is required for generate-wav", file=sys.stderr) + sys.exit(2) + if not generate_sine_wav(args.file): + sys.exit(1) + elif args.command == "prepare-model": + if not args.model or not args.model_dir: + print("--model and --model-dir are required for prepare-model", file=sys.stderr) + sys.exit(2) + if not prepare_model(args.model, args.model_dir): + sys.exit(1) + elif args.command == "test-cuda": + if not test_cuda(): + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/misc/testsAndMisc-bash/transcribe.sh b/scripts/misc/testsAndMisc-bash/transcribe.sh index f7e0049..42e0465 100755 --- a/scripts/misc/testsAndMisc-bash/transcribe.sh +++ b/scripts/misc/testsAndMisc-bash/transcribe.sh @@ -11,6 +11,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$SCRIPT_DIR" TOOLS_DIR="$PROJECT_DIR/tools" PY_RUNNER="$TOOLS_DIR/transcribe_fw.py" +PY_HELPERS="$TOOLS_DIR/transcribe_helpers.py" VENV_DIR="$PROJECT_DIR/.venv" usage() { @@ -73,7 +74,7 @@ has_libcublas12() { # venv-provided NVIDIA CUDA libs if [[ -x "$VENV_DIR/bin/python" ]]; then local pyver - pyver="$("$VENV_DIR"/bin/python -c 'import sys;print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || true)" + pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2>/dev/null || true)" if [[ -n $pyver ]]; then for d in "$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib" \ "$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib" \ @@ -234,46 +235,37 @@ install_python_deps() { export PIP_DEFAULT_TIMEOUT=${PIP_DEFAULT_TIMEOUT:-20} if [[ $OFFLINE -eq 1 ]]; then # Offline: do not install, just verify modules - if ! python -c 'import faster_whisper' >/dev/null 2>&1; then - echo "Python dependency 'faster_whisper' not found in offline mode. Run with --online to install." >&2 + if ! python "$PY_HELPERS" check-faster-whisper; then exit 7 fi # If diarization requested offline, check for its deps too (warn-only) if [[ ${FW_DIARIZE:-} == "1" ]]; then - python - <<'PY' || true -try: - import soundfile, speechbrain, torch # noqa: F401 -except Exception as e: - print(f"[WARN] Diarization deps missing offline ({e}); speaker labels will be skipped.") -PY + python "$PY_HELPERS" check-diarization || true fi return 0 fi if [[ $has_nvidia_flag -eq 1 ]]; then - # If ctranslate2 is not installed, attempt CUDA-enabled wheel (quiet, with fallback) - if ! "$VENV_DIR/bin/python" -c 'import ctranslate2' >/dev/null 2>&1; then + # If ctranslate2 is not installed, attempt CUDA-enabled wheel (with fallback) + if ! "$VENV_DIR/bin/python" "$PY_HELPERS" check-ctranslate2 2>/dev/null; then log "Installing CUDA-enabled CTranslate2 (cu12 wheel)" - python -m pip install -q --retries 1 --upgrade "ctranslate2<5,>=4.0" --extra-index-url https://download.opennmt.net/ctranslate2/cu12 || + python -m pip install --progress-bar on --retries 1 --upgrade "ctranslate2<5,>=4.0" --extra-index-url https://download.opennmt.net/ctranslate2/cu12 || log "Warning: could not reach cu12 wheel index; will proceed with available ctranslate2" fi # Ensure NVIDIA CUDA 12 runtime libs are available inside the venv - python -m pip install -q --retries 1 --upgrade nvidia-cublas-cu12 nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 || + python -m pip install --progress-bar on --retries 1 --upgrade nvidia-cublas-cu12 nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 || log "Warning: failed to install NVIDIA cu12 runtime libs via pip" fi - python -m pip install -q --retries 1 --upgrade faster-whisper ffmpeg-python + python -m pip install --progress-bar on --retries 1 --upgrade faster-whisper ffmpeg-python # If diarization requested and online, install its Python deps best-effort if [[ ${FW_DIARIZE:-} == "1" ]]; then - python -m pip install -q --retries 1 --upgrade soundfile speechbrain || + python -m pip install --progress-bar on --retries 1 --upgrade soundfile speechbrain || log "Warning: failed to install soundfile/speechbrain" # Torch and torchaudio CPU wheels (force to avoid mismatched CUDA builds) - python -m pip install -q --retries 1 --upgrade --force-reinstall --index-url https://download.pytorch.org/whl/cpu torch torchaudio || + python -m pip install --progress-bar on --retries 1 --upgrade --force-reinstall --index-url https://download.pytorch.org/whl/cpu torch torchaudio || log "Warning: failed to install torch/torchaudio CPU wheels" fi - python - <<'PY' -import sys -print(f"[PY] Python {sys.version.split()[0]} dependencies installed.") -PY + python "$PY_HELPERS" deps-installed } ensure_runner() { @@ -298,7 +290,7 @@ generate_test_audio() { # Fallback: generate tone via Python stdlib (no external deps) if [[ ! -s $tmpwav ]]; then log "Generating 3s 1kHz WAV via Python stdlib -> $tmpwav" >&2 - python3 -c 'import sys,wave,math,array;outfile=sys.argv[1];fr=16000;dur=3;freq=1000.0;ampl=0.3;n=fr*dur;data=array.array("h",[int(max(-1.0,min(1.0,ampl*math.sin(2*math.pi*freq*(i/fr))))*32767) for i in range(n)]);wf=wave.open(outfile,"w");wf.setnchannels(1);wf.setsampwidth(2);wf.setframerate(fr);wf.writeframes(data.tobytes());wf.close()' "$tmpwav" || true + python3 "$PY_HELPERS" generate-wav --file "$tmpwav" || true fi # Final fallback: tone via ffmpeg if [[ ! -s $tmpwav ]]; then @@ -315,15 +307,7 @@ prepare_model() { # shellcheck disable=SC1091 source "$VENV_DIR/bin/activate" log "Preparing model '$name' into $MODEL_DIR" - python - </dev/null || true)" + pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2>/dev/null || true)" if [[ -n $pyver ]]; then venv_cuda_paths="$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cuda_runtime/lib" fi @@ -454,7 +436,7 @@ main() { export PATH="${PATH}:${CUDA_HOME}/bin" # shellcheck disable=SC1091 source "$VENV_DIR/bin/activate" - python -c 'from faster_whisper import WhisperModel; WhisperModel("tiny", device="cuda", compute_type="float16"); print("[PY] CUDA test init succeeded.")' || { + python "$PY_HELPERS" test-cuda || { echo "CUDA environment check failed. Aborting as requested." >&2 exit 6 }