feat: more descriptive transbtion installation

2026-07-04 13:23:15 +02:00 · 2025-12-20 21:49:52 +01:00 · 2025-12-20 21:49:52 +01:00 · 16c2208ac8
commit 16c2208ac8
parent 6a26422737
5 changed files with 340 additions and 35 deletions
--- a/scripts/misc/testsAndMisc-bash/tools/pycache/transcribe_fw.cpython-313.pyc
+++ b/scripts/misc/testsAndMisc-bash/tools/pycache/transcribe_fw.cpython-313.pyc
--- a/scripts/misc/testsAndMisc-bash/tools/pycache/transcribe_helpers.cpython-313.pyc
+++ b/scripts/misc/testsAndMisc-bash/tools/pycache/transcribe_helpers.cpython-313.pyc
--- a/scripts/misc/testsAndMisc-bash/tools/transcribe_fw.py
+++ b/scripts/misc/testsAndMisc-bash/tools/transcribe_fw.py
@ -7,6 +7,121 @@ import sys
 import time
 from datetime import timedelta
 from typing import List, Optional
+
+
+def format_bytes(size: int) -> str:
+    """Format bytes as human-readable string."""
+    for unit in ['B', 'KB', 'MB', 'GB']:
+        if size < 1024:
+            return f"{size:.1f}{unit}"
+        size /= 1024
+    return f"{size:.1f}TB"
+
+
+def download_model_with_progress(model_name: str) -> str:
+    """Download model files from HuggingFace with a visible progress bar.
+    
+    Returns the local path to the downloaded model.
+    """
+    try:
+        from huggingface_hub import snapshot_download, hf_hub_download
+        from huggingface_hub.utils import EntryNotFoundError
+    except ImportError:
+        print("[WARN] huggingface_hub not available, falling back to default download", file=sys.stderr)
+        return model_name
+    
+    # Map common model names to HF repo IDs
+    model_map = {
+        "tiny": "Systran/faster-whisper-tiny",
+        "tiny.en": "Systran/faster-whisper-tiny.en",
+        "base": "Systran/faster-whisper-base",
+        "base.en": "Systran/faster-whisper-base.en",
+        "small": "Systran/faster-whisper-small",
+        "small.en": "Systran/faster-whisper-small.en",
+        "medium": "Systran/faster-whisper-medium",
+        "medium.en": "Systran/faster-whisper-medium.en",
+        "large-v1": "Systran/faster-whisper-large-v1",
+        "large-v2": "Systran/faster-whisper-large-v2",
+        "large-v3": "Systran/faster-whisper-large-v3",
+        "large": "Systran/faster-whisper-large-v3",
+        "distil-large-v2": "Systran/faster-distil-whisper-large-v2",
+        "distil-large-v3": "Systran/faster-distil-whisper-large-v3",
+        "distil-medium.en": "Systran/faster-distil-whisper-medium.en",
+        "distil-small.en": "Systran/faster-distil-whisper-small.en",
+    }
+    
+    repo_id = model_map.get(model_name, model_name)
+    
+    # Check if it looks like a repo ID
+    if "/" not in repo_id and model_name not in model_map:
+        # Assume it's a Systran model
+        repo_id = f"Systran/faster-whisper-{model_name}"
+    
+    print(f"[INFO] Checking model: {repo_id}", flush=True)
+    
+    # Files we need to download (model.bin is the large one)
+    required_files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.txt"]
+    
+    try:
+        # Use snapshot_download which handles caching and shows what's happening
+        # First, let's check if model.bin needs downloading by checking cache
+        from huggingface_hub import try_to_load_from_cache, HfFileSystem
+        
+        cache_path = try_to_load_from_cache(repo_id, "model.bin")
+        if cache_path is not None:
+            print(f"[INFO] Model already cached, loading from: {os.path.dirname(cache_path)}", flush=True)
+            # Return the directory containing the cached files
+            return os.path.dirname(cache_path)
+        
+        # Model not cached, need to download
+        print(f"[INFO] Downloading model files from {repo_id}...", flush=True)
+        print("[INFO] This may take several minutes for large models (~3GB for large-v3)", flush=True)
+        
+        # Get file sizes to show progress
+        try:
+            fs = HfFileSystem()
+            files_info = fs.ls(repo_id, detail=True)
+            total_size = sum(f.get('size', 0) for f in files_info if f.get('name', '').split('/')[-1] in required_files)
+            print(f"[INFO] Total download size: ~{format_bytes(total_size)}", flush=True)
+        except Exception:
+            pass  # Size info is optional
+        
+        # Download with progress
+        downloaded = 0
+        start_time = time.time()
+        
+        for filename in required_files:
+            file_start = time.time()
+            print(f"[DOWNLOAD] {filename}...", end=" ", flush=True)
+            try:
+                local_path = hf_hub_download(
+                    repo_id=repo_id,
+                    filename=filename,
+                    resume_download=True,
+                )
+                elapsed = time.time() - file_start
+                file_size = os.path.getsize(local_path) if os.path.exists(local_path) else 0
+                print(f"done ({format_bytes(file_size)}, {elapsed:.1f}s)", flush=True)
+                downloaded += 1
+                
+                # Return directory on first successful download
+                if downloaded == 1:
+                    model_dir = os.path.dirname(local_path)
+            except EntryNotFoundError:
+                print("not found (optional)", flush=True)
+            except Exception as e:
+                print(f"error: {e}", flush=True)
+        
+        total_time = time.time() - start_time
+        print(f"[INFO] Download complete in {total_time:.1f}s", flush=True)
+        
+        return model_dir
+        
+    except Exception as e:
+        print(f"[WARN] Custom download failed ({e}), falling back to default", file=sys.stderr)
+        return model_name
+
+
 def format_timestamp(seconds: float) -> str:
    td = timedelta(seconds=seconds)
    # Ensure SRT format HH:MM:SS,mmm
@ -324,7 +439,21 @@ def main():
        compute_type = "float16" if device == "cuda" else "float32"

    print(f"[INFO] Loading model='{args.model}', device='{device}', compute_type='{compute_type}'")
-    model = WhisperModel(args.model, device=device, compute_type=compute_type)
+
+    # Pre-download model files with explicit progress if not already cached
+    model_path = args.model
+    if not os.path.isdir(args.model):  # Not a local path, need to download from HF
+        model_path = download_model_with_progress(args.model)
+
+    # Show CTranslate2 conversion progress
+    import logging
+    logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
+    ct2_logger = logging.getLogger("faster_whisper")
+    ct2_logger.setLevel(logging.INFO)
+
+    print("[INFO] Initializing model...", flush=True)
+    model = WhisperModel(model_path, device=device, compute_type=compute_type)
+    print("[INFO] Model loaded successfully.", flush=True)

    # Transcription with live progress
    total_duration = get_media_duration(inp)
--- a/scripts/misc/testsAndMisc-bash/tools/transcribe_helpers.py
+++ b/scripts/misc/testsAndMisc-bash/tools/transcribe_helpers.py
@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+"""Helper utilities for transcribe.sh - replaces inline Python snippets."""
+
+import argparse
+import math
+import os
+import sys
+import array
+import wave
+
+
+def get_python_version() -> str:
+    """Return Python major.minor version string."""
+    return f"{sys.version_info.major}.{sys.version_info.minor}"
+
+
+def check_faster_whisper() -> bool:
+    """Check if faster_whisper is importable. Exit 7 if not."""
+    try:
+        import faster_whisper  # noqa: F401
+        return True
+    except ImportError:
+        return False
+
+
+def check_diarization_deps() -> bool:
+    """Check if diarization dependencies are available. Returns False with warning if missing."""
+    try:
+        import soundfile  # noqa: F401
+        import speechbrain  # noqa: F401
+        import torch  # noqa: F401
+        return True
+    except Exception as e:
+        print(f"[WARN] Diarization deps missing offline ({e}); speaker labels will be skipped.")
+        return False
+
+
+def check_ctranslate2() -> bool:
+    """Check if ctranslate2 is importable."""
+    try:
+        import ctranslate2  # noqa: F401
+        return True
+    except ImportError:
+        return False
+
+
+def print_deps_installed():
+    """Print confirmation that Python dependencies are installed."""
+    print(f"[PY] Python {sys.version.split()[0]} dependencies installed.")
+
+
+def generate_sine_wav(outfile: str, frequency: float = 1000.0, duration: int = 3,
+                      sample_rate: int = 16000, amplitude: float = 0.3) -> bool:
+    """Generate a sine wave WAV file using only Python stdlib.
+    
+    Args:
+        outfile: Output WAV file path
+        frequency: Tone frequency in Hz (default: 1000)
+        duration: Duration in seconds (default: 3)
+        sample_rate: Sample rate in Hz (default: 16000)
+        amplitude: Amplitude 0.0-1.0 (default: 0.3)
+    
+    Returns:
+        True on success, False on failure
+    """
+    try:
+        n_samples = sample_rate * duration
+        data = array.array("h", [
+            int(max(-1.0, min(1.0, amplitude * math.sin(2 * math.pi * frequency * (i / sample_rate)))) * 32767)
+            for i in range(n_samples)
+        ])
+        with wave.open(outfile, "w") as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(sample_rate)
+            wf.writeframes(data.tobytes())
+        return True
+    except Exception as e:
+        print(f"[ERROR] Failed to generate WAV: {e}", file=sys.stderr)
+        return False
+
+
+def prepare_model(model_name: str, model_dir: str) -> bool:
+    """Download a whisper model for offline use.
+    
+    Args:
+        model_name: Model name (tiny, base, small, medium, large-v3, etc.)
+        model_dir: Directory to store the model
+    
+    Returns:
+        True on success, False on failure
+    """
+    try:
+        from faster_whisper import WhisperModel
+        
+        # Enable HuggingFace Hub progress bars for model download
+        try:
+            from huggingface_hub import logging as hf_logging
+            hf_logging.set_verbosity_info()
+            import huggingface_hub
+            huggingface_hub.constants.HF_HUB_DISABLE_PROGRESS_BARS = False
+            os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
+        except ImportError:
+            pass
+        
+        print(f"[PY] Preparing model '{model_name}' into {model_dir}")
+        print("[INFO] Downloading model files (progress bar should appear below)...", flush=True)
+        WhisperModel(model_name, device="cpu", compute_type="int8", download_root=model_dir)
+        print("[PY] Model prepared.")
+        return True
+    except Exception as e:
+        print(f"[ERROR] Failed to prepare model: {e}", file=sys.stderr)
+        return False
+
+
+def test_cuda() -> bool:
+    """Test CUDA initialization with faster-whisper.
+    
+    Returns:
+        True if CUDA works, False otherwise
+    """
+    try:
+        from faster_whisper import WhisperModel
+        WhisperModel("tiny", device="cuda", compute_type="float16")
+        print("[PY] CUDA test init succeeded.")
+        return True
+    except Exception as e:
+        print(f"[ERROR] CUDA test failed: {e}", file=sys.stderr)
+        return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Helper utilities for transcribe.sh",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Commands:
+  python-version       Print Python major.minor version
+  check-faster-whisper Check if faster_whisper is installed (exit 7 if not)
+  check-diarization    Check diarization deps (warn if missing)
+  check-ctranslate2    Check if ctranslate2 is installed (exit 1 if not)
+  deps-installed       Print deps installed confirmation message
+  generate-wav FILE    Generate a 3s 1kHz sine wave WAV file
+  prepare-model        Download model for offline use (requires --model and --model-dir)
+  test-cuda            Test CUDA initialization
+""")
+    parser.add_argument("command", choices=[
+        "python-version",
+        "check-faster-whisper",
+        "check-diarization",
+        "check-ctranslate2",
+        "deps-installed",
+        "generate-wav",
+        "prepare-model",
+        "test-cuda",
+    ], help="Command to run")
+    parser.add_argument("--file", help="Output file path (for generate-wav)")
+    parser.add_argument("--model", help="Model name (for prepare-model)")
+    parser.add_argument("--model-dir", help="Model directory (for prepare-model)")
+
+    args = parser.parse_args()
+
+    if args.command == "python-version":
+        print(get_python_version())
+    elif args.command == "check-faster-whisper":
+        if not check_faster_whisper():
+            print("Python dependency 'faster_whisper' not found in offline mode. Run with --online to install.", file=sys.stderr)
+            sys.exit(7)
+    elif args.command == "check-diarization":
+        check_diarization_deps()
+    elif args.command == "check-ctranslate2":
+        if not check_ctranslate2():
+            sys.exit(1)
+    elif args.command == "deps-installed":
+        print_deps_installed()
+    elif args.command == "generate-wav":
+        if not args.file:
+            print("--file is required for generate-wav", file=sys.stderr)
+            sys.exit(2)
+        if not generate_sine_wav(args.file):
+            sys.exit(1)
+    elif args.command == "prepare-model":
+        if not args.model or not args.model_dir:
+            print("--model and --model-dir are required for prepare-model", file=sys.stderr)
+            sys.exit(2)
+        if not prepare_model(args.model, args.model_dir):
+            sys.exit(1)
+    elif args.command == "test-cuda":
+        if not test_cuda():
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/misc/testsAndMisc-bash/transcribe.sh
+++ b/scripts/misc/testsAndMisc-bash/transcribe.sh
@ -11,6 +11,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PROJECT_DIR="$SCRIPT_DIR"
 TOOLS_DIR="$PROJECT_DIR/tools"
 PY_RUNNER="$TOOLS_DIR/transcribe_fw.py"
+PY_HELPERS="$TOOLS_DIR/transcribe_helpers.py"
 VENV_DIR="$PROJECT_DIR/.venv"

 usage() {
@ -73,7 +74,7 @@ has_libcublas12() {
 	# venv-provided NVIDIA CUDA libs
 	if [[ -x "$VENV_DIR/bin/python" ]]; then
 		local pyver
-		pyver="$("$VENV_DIR"/bin/python -c 'import sys;print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || true)"
+		pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2>/dev/null || true)"
 		if [[ -n $pyver ]]; then
 			for d in "$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib" \
 				"$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib" \
@ -234,46 +235,37 @@ install_python_deps() {
 	export PIP_DEFAULT_TIMEOUT=${PIP_DEFAULT_TIMEOUT:-20}
 	if [[ $OFFLINE -eq 1 ]]; then
 		# Offline: do not install, just verify modules
-		if ! python -c 'import faster_whisper' >/dev/null 2>&1; then
-			echo "Python dependency 'faster_whisper' not found in offline mode. Run with --online to install." >&2
+		if ! python "$PY_HELPERS" check-faster-whisper; then
 			exit 7
 		fi
 		# If diarization requested offline, check for its deps too (warn-only)
 		if [[ ${FW_DIARIZE:-} == "1" ]]; then
-			python - <<'PY' || true
-try:
-    import soundfile, speechbrain, torch  # noqa: F401
-except Exception as e:
-    print(f"[WARN] Diarization deps missing offline ({e}); speaker labels will be skipped.")
-PY
+			python "$PY_HELPERS" check-diarization || true
 		fi
 		return 0
 	fi
 	if [[ $has_nvidia_flag -eq 1 ]]; then
-		# If ctranslate2 is not installed, attempt CUDA-enabled wheel (quiet, with fallback)
-		if ! "$VENV_DIR/bin/python" -c 'import ctranslate2' >/dev/null 2>&1; then
+		# If ctranslate2 is not installed, attempt CUDA-enabled wheel (with fallback)
+		if ! "$VENV_DIR/bin/python" "$PY_HELPERS" check-ctranslate2 2>/dev/null; then
 			log "Installing CUDA-enabled CTranslate2 (cu12 wheel)"
-			python -m pip install -q --retries 1 --upgrade "ctranslate2<5,>=4.0" --extra-index-url https://download.opennmt.net/ctranslate2/cu12 ||
+			python -m pip install --progress-bar on --retries 1 --upgrade "ctranslate2<5,>=4.0" --extra-index-url https://download.opennmt.net/ctranslate2/cu12 ||
 				log "Warning: could not reach cu12 wheel index; will proceed with available ctranslate2"
 		fi
 		# Ensure NVIDIA CUDA 12 runtime libs are available inside the venv
-		python -m pip install -q --retries 1 --upgrade nvidia-cublas-cu12 nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 ||
+		python -m pip install --progress-bar on --retries 1 --upgrade nvidia-cublas-cu12 nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 ||
 			log "Warning: failed to install NVIDIA cu12 runtime libs via pip"
 	fi
-	python -m pip install -q --retries 1 --upgrade faster-whisper ffmpeg-python
+	python -m pip install --progress-bar on --retries 1 --upgrade faster-whisper ffmpeg-python

 	# If diarization requested and online, install its Python deps best-effort
 	if [[ ${FW_DIARIZE:-} == "1" ]]; then
-		python -m pip install -q --retries 1 --upgrade soundfile speechbrain ||
+		python -m pip install --progress-bar on --retries 1 --upgrade soundfile speechbrain ||
 			log "Warning: failed to install soundfile/speechbrain"
 		# Torch and torchaudio CPU wheels (force to avoid mismatched CUDA builds)
-		python -m pip install -q --retries 1 --upgrade --force-reinstall --index-url https://download.pytorch.org/whl/cpu torch torchaudio ||
+		python -m pip install --progress-bar on --retries 1 --upgrade --force-reinstall --index-url https://download.pytorch.org/whl/cpu torch torchaudio ||
 			log "Warning: failed to install torch/torchaudio CPU wheels"
 	fi
-	python - <<'PY'
-import sys
-print(f"[PY] Python {sys.version.split()[0]} dependencies installed.")
-PY
+	python "$PY_HELPERS" deps-installed
 }

 ensure_runner() {
@ -298,7 +290,7 @@ generate_test_audio() {
 	# Fallback: generate tone via Python stdlib (no external deps)
 	if [[ ! -s $tmpwav ]]; then
 		log "Generating 3s 1kHz WAV via Python stdlib -> $tmpwav" >&2
-		python3 -c 'import sys,wave,math,array;outfile=sys.argv[1];fr=16000;dur=3;freq=1000.0;ampl=0.3;n=fr*dur;data=array.array("h",[int(max(-1.0,min(1.0,ampl*math.sin(2*math.pi*freq*(i/fr))))*32767) for i in range(n)]);wf=wave.open(outfile,"w");wf.setnchannels(1);wf.setsampwidth(2);wf.setframerate(fr);wf.writeframes(data.tobytes());wf.close()' "$tmpwav" || true
+		python3 "$PY_HELPERS" generate-wav --file "$tmpwav" || true
 	fi
 	# Final fallback: tone via ffmpeg
 	if [[ ! -s $tmpwav ]]; then
@ -315,15 +307,7 @@ prepare_model() {
 	# shellcheck disable=SC1091
 	source "$VENV_DIR/bin/activate"
 	log "Preparing model '$name' into $MODEL_DIR"
-	python - <<PY
-import sys, os
-from faster_whisper import WhisperModel
-name = os.environ.get('FW_PREPARE_NAME')
-root = os.environ.get('FW_MODEL_DIR')
-print(f"[PY] Preparing model '{name}' into {root}")
-WhisperModel(name, device="cpu", compute_type="int8", download_root=root)
-print("[PY] Model prepared.")
-PY
+	python "$PY_HELPERS" prepare-model --model "$name" --model-dir "$MODEL_DIR"
 }

 main() {
@ -397,8 +381,6 @@ main() {
 			exit 2
 		fi
 		install_python_deps 0
-		export FW_PREPARE_NAME="$PREPARE_MODEL"
-		export FW_MODEL_DIR="$MODEL_DIR"
 		prepare_model "$PREPARE_MODEL"
 		log "Model '$PREPARE_MODEL' downloaded to $MODEL_DIR"
 		exit 0
@ -445,7 +427,7 @@ main() {
 		# Include system and possible venv-provided CUDA libs
 		local pyver venv_cuda_paths=""
 		if [[ -x "$VENV_DIR/bin/python" ]]; then
-			pyver="$("$VENV_DIR"/bin/python -c 'import sys;print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || true)"
+			pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2>/dev/null || true)"
 			if [[ -n $pyver ]]; then
 				venv_cuda_paths="$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cuda_runtime/lib"
 			fi
@ -454,7 +436,7 @@ main() {
 		export PATH="${PATH}:${CUDA_HOME}/bin"
 		# shellcheck disable=SC1091
 		source "$VENV_DIR/bin/activate"
-		python -c 'from faster_whisper import WhisperModel; WhisperModel("tiny", device="cuda", compute_type="float16"); print("[PY] CUDA test init succeeded.")' || {
+		python "$PY_HELPERS" test-cuda || {
 			echo "CUDA environment check failed. Aborting as requested." >&2
 			exit 6
 		}