feat: more descriptive transbtion installation

This commit is contained in:
Krzysztof Rudnicki 2025-12-20 21:49:52 +01:00
parent 6a26422737
commit 16c2208ac8
5 changed files with 340 additions and 35 deletions

View File

@ -7,6 +7,121 @@ import sys
import time
from datetime import timedelta
from typing import List, Optional
def format_bytes(size: int) -> str:
"""Format bytes as human-readable string."""
for unit in ['B', 'KB', 'MB', 'GB']:
if size < 1024:
return f"{size:.1f}{unit}"
size /= 1024
return f"{size:.1f}TB"
def download_model_with_progress(model_name: str) -> str:
"""Download model files from HuggingFace with a visible progress bar.
Returns the local path to the downloaded model.
"""
try:
from huggingface_hub import snapshot_download, hf_hub_download
from huggingface_hub.utils import EntryNotFoundError
except ImportError:
print("[WARN] huggingface_hub not available, falling back to default download", file=sys.stderr)
return model_name
# Map common model names to HF repo IDs
model_map = {
"tiny": "Systran/faster-whisper-tiny",
"tiny.en": "Systran/faster-whisper-tiny.en",
"base": "Systran/faster-whisper-base",
"base.en": "Systran/faster-whisper-base.en",
"small": "Systran/faster-whisper-small",
"small.en": "Systran/faster-whisper-small.en",
"medium": "Systran/faster-whisper-medium",
"medium.en": "Systran/faster-whisper-medium.en",
"large-v1": "Systran/faster-whisper-large-v1",
"large-v2": "Systran/faster-whisper-large-v2",
"large-v3": "Systran/faster-whisper-large-v3",
"large": "Systran/faster-whisper-large-v3",
"distil-large-v2": "Systran/faster-distil-whisper-large-v2",
"distil-large-v3": "Systran/faster-distil-whisper-large-v3",
"distil-medium.en": "Systran/faster-distil-whisper-medium.en",
"distil-small.en": "Systran/faster-distil-whisper-small.en",
}
repo_id = model_map.get(model_name, model_name)
# Check if it looks like a repo ID
if "/" not in repo_id and model_name not in model_map:
# Assume it's a Systran model
repo_id = f"Systran/faster-whisper-{model_name}"
print(f"[INFO] Checking model: {repo_id}", flush=True)
# Files we need to download (model.bin is the large one)
required_files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.txt"]
try:
# Use snapshot_download which handles caching and shows what's happening
# First, let's check if model.bin needs downloading by checking cache
from huggingface_hub import try_to_load_from_cache, HfFileSystem
cache_path = try_to_load_from_cache(repo_id, "model.bin")
if cache_path is not None:
print(f"[INFO] Model already cached, loading from: {os.path.dirname(cache_path)}", flush=True)
# Return the directory containing the cached files
return os.path.dirname(cache_path)
# Model not cached, need to download
print(f"[INFO] Downloading model files from {repo_id}...", flush=True)
print("[INFO] This may take several minutes for large models (~3GB for large-v3)", flush=True)
# Get file sizes to show progress
try:
fs = HfFileSystem()
files_info = fs.ls(repo_id, detail=True)
total_size = sum(f.get('size', 0) for f in files_info if f.get('name', '').split('/')[-1] in required_files)
print(f"[INFO] Total download size: ~{format_bytes(total_size)}", flush=True)
except Exception:
pass # Size info is optional
# Download with progress
downloaded = 0
start_time = time.time()
for filename in required_files:
file_start = time.time()
print(f"[DOWNLOAD] {filename}...", end=" ", flush=True)
try:
local_path = hf_hub_download(
repo_id=repo_id,
filename=filename,
resume_download=True,
)
elapsed = time.time() - file_start
file_size = os.path.getsize(local_path) if os.path.exists(local_path) else 0
print(f"done ({format_bytes(file_size)}, {elapsed:.1f}s)", flush=True)
downloaded += 1
# Return directory on first successful download
if downloaded == 1:
model_dir = os.path.dirname(local_path)
except EntryNotFoundError:
print("not found (optional)", flush=True)
except Exception as e:
print(f"error: {e}", flush=True)
total_time = time.time() - start_time
print(f"[INFO] Download complete in {total_time:.1f}s", flush=True)
return model_dir
except Exception as e:
print(f"[WARN] Custom download failed ({e}), falling back to default", file=sys.stderr)
return model_name
def format_timestamp(seconds: float) -> str:
td = timedelta(seconds=seconds)
# Ensure SRT format HH:MM:SS,mmm
@ -324,7 +439,21 @@ def main():
compute_type = "float16" if device == "cuda" else "float32"
print(f"[INFO] Loading model='{args.model}', device='{device}', compute_type='{compute_type}'")
model = WhisperModel(args.model, device=device, compute_type=compute_type)
# Pre-download model files with explicit progress if not already cached
model_path = args.model
if not os.path.isdir(args.model): # Not a local path, need to download from HF
model_path = download_model_with_progress(args.model)
# Show CTranslate2 conversion progress
import logging
logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
ct2_logger = logging.getLogger("faster_whisper")
ct2_logger.setLevel(logging.INFO)
print("[INFO] Initializing model...", flush=True)
model = WhisperModel(model_path, device=device, compute_type=compute_type)
print("[INFO] Model loaded successfully.", flush=True)
# Transcription with live progress
total_duration = get_media_duration(inp)

View File

@ -0,0 +1,194 @@
#!/usr/bin/env python3
"""Helper utilities for transcribe.sh - replaces inline Python snippets."""
import argparse
import math
import os
import sys
import array
import wave
def get_python_version() -> str:
"""Return Python major.minor version string."""
return f"{sys.version_info.major}.{sys.version_info.minor}"
def check_faster_whisper() -> bool:
"""Check if faster_whisper is importable. Exit 7 if not."""
try:
import faster_whisper # noqa: F401
return True
except ImportError:
return False
def check_diarization_deps() -> bool:
"""Check if diarization dependencies are available. Returns False with warning if missing."""
try:
import soundfile # noqa: F401
import speechbrain # noqa: F401
import torch # noqa: F401
return True
except Exception as e:
print(f"[WARN] Diarization deps missing offline ({e}); speaker labels will be skipped.")
return False
def check_ctranslate2() -> bool:
"""Check if ctranslate2 is importable."""
try:
import ctranslate2 # noqa: F401
return True
except ImportError:
return False
def print_deps_installed():
"""Print confirmation that Python dependencies are installed."""
print(f"[PY] Python {sys.version.split()[0]} dependencies installed.")
def generate_sine_wav(outfile: str, frequency: float = 1000.0, duration: int = 3,
sample_rate: int = 16000, amplitude: float = 0.3) -> bool:
"""Generate a sine wave WAV file using only Python stdlib.
Args:
outfile: Output WAV file path
frequency: Tone frequency in Hz (default: 1000)
duration: Duration in seconds (default: 3)
sample_rate: Sample rate in Hz (default: 16000)
amplitude: Amplitude 0.0-1.0 (default: 0.3)
Returns:
True on success, False on failure
"""
try:
n_samples = sample_rate * duration
data = array.array("h", [
int(max(-1.0, min(1.0, amplitude * math.sin(2 * math.pi * frequency * (i / sample_rate)))) * 32767)
for i in range(n_samples)
])
with wave.open(outfile, "w") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sample_rate)
wf.writeframes(data.tobytes())
return True
except Exception as e:
print(f"[ERROR] Failed to generate WAV: {e}", file=sys.stderr)
return False
def prepare_model(model_name: str, model_dir: str) -> bool:
"""Download a whisper model for offline use.
Args:
model_name: Model name (tiny, base, small, medium, large-v3, etc.)
model_dir: Directory to store the model
Returns:
True on success, False on failure
"""
try:
from faster_whisper import WhisperModel
# Enable HuggingFace Hub progress bars for model download
try:
from huggingface_hub import logging as hf_logging
hf_logging.set_verbosity_info()
import huggingface_hub
huggingface_hub.constants.HF_HUB_DISABLE_PROGRESS_BARS = False
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
except ImportError:
pass
print(f"[PY] Preparing model '{model_name}' into {model_dir}")
print("[INFO] Downloading model files (progress bar should appear below)...", flush=True)
WhisperModel(model_name, device="cpu", compute_type="int8", download_root=model_dir)
print("[PY] Model prepared.")
return True
except Exception as e:
print(f"[ERROR] Failed to prepare model: {e}", file=sys.stderr)
return False
def test_cuda() -> bool:
"""Test CUDA initialization with faster-whisper.
Returns:
True if CUDA works, False otherwise
"""
try:
from faster_whisper import WhisperModel
WhisperModel("tiny", device="cuda", compute_type="float16")
print("[PY] CUDA test init succeeded.")
return True
except Exception as e:
print(f"[ERROR] CUDA test failed: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(
description="Helper utilities for transcribe.sh",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Commands:
python-version Print Python major.minor version
check-faster-whisper Check if faster_whisper is installed (exit 7 if not)
check-diarization Check diarization deps (warn if missing)
check-ctranslate2 Check if ctranslate2 is installed (exit 1 if not)
deps-installed Print deps installed confirmation message
generate-wav FILE Generate a 3s 1kHz sine wave WAV file
prepare-model Download model for offline use (requires --model and --model-dir)
test-cuda Test CUDA initialization
""")
parser.add_argument("command", choices=[
"python-version",
"check-faster-whisper",
"check-diarization",
"check-ctranslate2",
"deps-installed",
"generate-wav",
"prepare-model",
"test-cuda",
], help="Command to run")
parser.add_argument("--file", help="Output file path (for generate-wav)")
parser.add_argument("--model", help="Model name (for prepare-model)")
parser.add_argument("--model-dir", help="Model directory (for prepare-model)")
args = parser.parse_args()
if args.command == "python-version":
print(get_python_version())
elif args.command == "check-faster-whisper":
if not check_faster_whisper():
print("Python dependency 'faster_whisper' not found in offline mode. Run with --online to install.", file=sys.stderr)
sys.exit(7)
elif args.command == "check-diarization":
check_diarization_deps()
elif args.command == "check-ctranslate2":
if not check_ctranslate2():
sys.exit(1)
elif args.command == "deps-installed":
print_deps_installed()
elif args.command == "generate-wav":
if not args.file:
print("--file is required for generate-wav", file=sys.stderr)
sys.exit(2)
if not generate_sine_wav(args.file):
sys.exit(1)
elif args.command == "prepare-model":
if not args.model or not args.model_dir:
print("--model and --model-dir are required for prepare-model", file=sys.stderr)
sys.exit(2)
if not prepare_model(args.model, args.model_dir):
sys.exit(1)
elif args.command == "test-cuda":
if not test_cuda():
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -11,6 +11,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$SCRIPT_DIR"
TOOLS_DIR="$PROJECT_DIR/tools"
PY_RUNNER="$TOOLS_DIR/transcribe_fw.py"
PY_HELPERS="$TOOLS_DIR/transcribe_helpers.py"
VENV_DIR="$PROJECT_DIR/.venv"
usage() {
@ -73,7 +74,7 @@ has_libcublas12() {
# venv-provided NVIDIA CUDA libs
if [[ -x "$VENV_DIR/bin/python" ]]; then
local pyver
pyver="$("$VENV_DIR"/bin/python -c 'import sys;print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || true)"
pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2>/dev/null || true)"
if [[ -n $pyver ]]; then
for d in "$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib" \
"$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib" \
@ -234,46 +235,37 @@ install_python_deps() {
export PIP_DEFAULT_TIMEOUT=${PIP_DEFAULT_TIMEOUT:-20}
if [[ $OFFLINE -eq 1 ]]; then
# Offline: do not install, just verify modules
if ! python -c 'import faster_whisper' >/dev/null 2>&1; then
echo "Python dependency 'faster_whisper' not found in offline mode. Run with --online to install." >&2
if ! python "$PY_HELPERS" check-faster-whisper; then
exit 7
fi
# If diarization requested offline, check for its deps too (warn-only)
if [[ ${FW_DIARIZE:-} == "1" ]]; then
python - <<'PY' || true
try:
import soundfile, speechbrain, torch # noqa: F401
except Exception as e:
print(f"[WARN] Diarization deps missing offline ({e}); speaker labels will be skipped.")
PY
python "$PY_HELPERS" check-diarization || true
fi
return 0
fi
if [[ $has_nvidia_flag -eq 1 ]]; then
# If ctranslate2 is not installed, attempt CUDA-enabled wheel (quiet, with fallback)
if ! "$VENV_DIR/bin/python" -c 'import ctranslate2' >/dev/null 2>&1; then
# If ctranslate2 is not installed, attempt CUDA-enabled wheel (with fallback)
if ! "$VENV_DIR/bin/python" "$PY_HELPERS" check-ctranslate2 2>/dev/null; then
log "Installing CUDA-enabled CTranslate2 (cu12 wheel)"
python -m pip install -q --retries 1 --upgrade "ctranslate2<5,>=4.0" --extra-index-url https://download.opennmt.net/ctranslate2/cu12 ||
python -m pip install --progress-bar on --retries 1 --upgrade "ctranslate2<5,>=4.0" --extra-index-url https://download.opennmt.net/ctranslate2/cu12 ||
log "Warning: could not reach cu12 wheel index; will proceed with available ctranslate2"
fi
# Ensure NVIDIA CUDA 12 runtime libs are available inside the venv
python -m pip install -q --retries 1 --upgrade nvidia-cublas-cu12 nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 ||
python -m pip install --progress-bar on --retries 1 --upgrade nvidia-cublas-cu12 nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 ||
log "Warning: failed to install NVIDIA cu12 runtime libs via pip"
fi
python -m pip install -q --retries 1 --upgrade faster-whisper ffmpeg-python
python -m pip install --progress-bar on --retries 1 --upgrade faster-whisper ffmpeg-python
# If diarization requested and online, install its Python deps best-effort
if [[ ${FW_DIARIZE:-} == "1" ]]; then
python -m pip install -q --retries 1 --upgrade soundfile speechbrain ||
python -m pip install --progress-bar on --retries 1 --upgrade soundfile speechbrain ||
log "Warning: failed to install soundfile/speechbrain"
# Torch and torchaudio CPU wheels (force to avoid mismatched CUDA builds)
python -m pip install -q --retries 1 --upgrade --force-reinstall --index-url https://download.pytorch.org/whl/cpu torch torchaudio ||
python -m pip install --progress-bar on --retries 1 --upgrade --force-reinstall --index-url https://download.pytorch.org/whl/cpu torch torchaudio ||
log "Warning: failed to install torch/torchaudio CPU wheels"
fi
python - <<'PY'
import sys
print(f"[PY] Python {sys.version.split()[0]} dependencies installed.")
PY
python "$PY_HELPERS" deps-installed
}
ensure_runner() {
@ -298,7 +290,7 @@ generate_test_audio() {
# Fallback: generate tone via Python stdlib (no external deps)
if [[ ! -s $tmpwav ]]; then
log "Generating 3s 1kHz WAV via Python stdlib -> $tmpwav" >&2
python3 -c 'import sys,wave,math,array;outfile=sys.argv[1];fr=16000;dur=3;freq=1000.0;ampl=0.3;n=fr*dur;data=array.array("h",[int(max(-1.0,min(1.0,ampl*math.sin(2*math.pi*freq*(i/fr))))*32767) for i in range(n)]);wf=wave.open(outfile,"w");wf.setnchannels(1);wf.setsampwidth(2);wf.setframerate(fr);wf.writeframes(data.tobytes());wf.close()' "$tmpwav" || true
python3 "$PY_HELPERS" generate-wav --file "$tmpwav" || true
fi
# Final fallback: tone via ffmpeg
if [[ ! -s $tmpwav ]]; then
@ -315,15 +307,7 @@ prepare_model() {
# shellcheck disable=SC1091
source "$VENV_DIR/bin/activate"
log "Preparing model '$name' into $MODEL_DIR"
python - <<PY
import sys, os
from faster_whisper import WhisperModel
name = os.environ.get('FW_PREPARE_NAME')
root = os.environ.get('FW_MODEL_DIR')
print(f"[PY] Preparing model '{name}' into {root}")
WhisperModel(name, device="cpu", compute_type="int8", download_root=root)
print("[PY] Model prepared.")
PY
python "$PY_HELPERS" prepare-model --model "$name" --model-dir "$MODEL_DIR"
}
main() {
@ -397,8 +381,6 @@ main() {
exit 2
fi
install_python_deps 0
export FW_PREPARE_NAME="$PREPARE_MODEL"
export FW_MODEL_DIR="$MODEL_DIR"
prepare_model "$PREPARE_MODEL"
log "Model '$PREPARE_MODEL' downloaded to $MODEL_DIR"
exit 0
@ -445,7 +427,7 @@ main() {
# Include system and possible venv-provided CUDA libs
local pyver venv_cuda_paths=""
if [[ -x "$VENV_DIR/bin/python" ]]; then
pyver="$("$VENV_DIR"/bin/python -c 'import sys;print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || true)"
pyver="$("$VENV_DIR"/bin/python "$PY_HELPERS" python-version 2>/dev/null || true)"
if [[ -n $pyver ]]; then
venv_cuda_paths="$VENV_DIR/lib/python$pyver/site-packages/nvidia/cublas/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cudnn/lib:$VENV_DIR/lib/python$pyver/site-packages/nvidia/cuda_runtime/lib"
fi
@ -454,7 +436,7 @@ main() {
export PATH="${PATH}:${CUDA_HOME}/bin"
# shellcheck disable=SC1091
source "$VENV_DIR/bin/activate"
python -c 'from faster_whisper import WhisperModel; WhisperModel("tiny", device="cuda", compute_type="float16"); print("[PY] CUDA test init succeeded.")' || {
python "$PY_HELPERS" test-cuda || {
echo "CUDA environment check failed. Aborting as requested." >&2
exit 6
}