testsAndMisc/linux_configuration/scripts/misc/testsAndMisc-bash/tools/transcribe_helpers.py

237 lines
7.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Helper utilities for transcribe.sh - replaces inline Python snippets."""
import argparse
2026-02-20 01:17:53 +01:00
import array
import math
import os
import sys
import wave
def get_python_version() -> str:
"""Return Python major.minor version string."""
return f"{sys.version_info.major}.{sys.version_info.minor}"
def check_faster_whisper() -> bool:
"""Check if faster_whisper is importable. Exit 7 if not."""
try:
import faster_whisper # noqa: F401
2026-02-20 01:17:53 +01:00
return True
except ImportError:
return False
def check_diarization_deps() -> bool:
"""Check if diarization dependencies are available. Returns False with warning if missing."""
try:
import soundfile # noqa: F401
import speechbrain # noqa: F401
import torch # noqa: F401
2026-02-20 01:17:53 +01:00
return True
except Exception as e:
2026-02-20 01:17:53 +01:00
print(
f"[WARN] Diarization deps missing offline ({e}); speaker labels will be skipped."
)
return False
def check_ctranslate2() -> bool:
"""Check if ctranslate2 is importable."""
try:
import ctranslate2 # noqa: F401
2026-02-20 01:17:53 +01:00
return True
except ImportError:
return False
def print_deps_installed():
"""Print confirmation that Python dependencies are installed."""
print(f"[PY] Python {sys.version.split()[0]} dependencies installed.")
2026-02-20 01:17:53 +01:00
def generate_sine_wav(
outfile: str,
frequency: float = 1000.0,
duration: int = 3,
sample_rate: int = 16000,
amplitude: float = 0.3,
) -> bool:
"""Generate a sine wave WAV file using only Python stdlib.
2026-02-20 01:17:53 +01:00
Args:
outfile: Output WAV file path
frequency: Tone frequency in Hz (default: 1000)
duration: Duration in seconds (default: 3)
sample_rate: Sample rate in Hz (default: 16000)
amplitude: Amplitude 0.0-1.0 (default: 0.3)
2026-02-20 01:17:53 +01:00
Returns:
True on success, False on failure
"""
try:
n_samples = sample_rate * duration
2026-02-20 01:17:53 +01:00
data = array.array(
"h",
[
int(
max(
-1.0,
min(
1.0,
amplitude
* math.sin(2 * math.pi * frequency * (i / sample_rate)),
),
)
* 32767
)
for i in range(n_samples)
],
)
with wave.open(outfile, "w") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sample_rate)
wf.writeframes(data.tobytes())
return True
except Exception as e:
print(f"[ERROR] Failed to generate WAV: {e}", file=sys.stderr)
return False
def prepare_model(model_name: str, model_dir: str) -> bool:
"""Download a whisper model for offline use.
2026-02-20 01:17:53 +01:00
Args:
model_name: Model name (tiny, base, small, medium, large-v3, etc.)
model_dir: Directory to store the model
2026-02-20 01:17:53 +01:00
Returns:
True on success, False on failure
"""
try:
from faster_whisper import WhisperModel
2026-02-20 01:17:53 +01:00
# Enable HuggingFace Hub progress bars for model download
try:
from huggingface_hub import logging as hf_logging
2026-02-20 01:17:53 +01:00
hf_logging.set_verbosity_info()
import huggingface_hub
2026-02-20 01:17:53 +01:00
huggingface_hub.constants.HF_HUB_DISABLE_PROGRESS_BARS = False
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
except ImportError:
pass
2026-02-20 01:17:53 +01:00
print(f"[PY] Preparing model '{model_name}' into {model_dir}")
2026-02-20 01:17:53 +01:00
print(
"[INFO] Downloading model files (progress bar should appear below)...",
flush=True,
)
WhisperModel(
model_name, device="cpu", compute_type="int8", download_root=model_dir
)
print("[PY] Model prepared.")
return True
except Exception as e:
print(f"[ERROR] Failed to prepare model: {e}", file=sys.stderr)
return False
def test_cuda() -> bool:
"""Test CUDA initialization with faster-whisper.
2026-02-20 01:17:53 +01:00
Returns:
True if CUDA works, False otherwise
"""
try:
from faster_whisper import WhisperModel
2026-02-20 01:17:53 +01:00
WhisperModel("tiny", device="cuda", compute_type="float16")
print("[PY] CUDA test init succeeded.")
return True
except Exception as e:
print(f"[ERROR] CUDA test failed: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(
description="Helper utilities for transcribe.sh",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Commands:
python-version Print Python major.minor version
check-faster-whisper Check if faster_whisper is installed (exit 7 if not)
check-diarization Check diarization deps (warn if missing)
check-ctranslate2 Check if ctranslate2 is installed (exit 1 if not)
deps-installed Print deps installed confirmation message
generate-wav FILE Generate a 3s 1kHz sine wave WAV file
prepare-model Download model for offline use (requires --model and --model-dir)
test-cuda Test CUDA initialization
2026-02-20 01:17:53 +01:00
""",
)
parser.add_argument(
"command",
choices=[
"python-version",
"check-faster-whisper",
"check-diarization",
"check-ctranslate2",
"deps-installed",
"generate-wav",
"prepare-model",
"test-cuda",
],
help="Command to run",
)
parser.add_argument("--file", help="Output file path (for generate-wav)")
parser.add_argument("--model", help="Model name (for prepare-model)")
parser.add_argument("--model-dir", help="Model directory (for prepare-model)")
args = parser.parse_args()
if args.command == "python-version":
print(get_python_version())
elif args.command == "check-faster-whisper":
if not check_faster_whisper():
2026-02-20 01:17:53 +01:00
print(
"Python dependency 'faster_whisper' not found in offline mode. Run with --online to install.",
file=sys.stderr,
)
sys.exit(7)
elif args.command == "check-diarization":
check_diarization_deps()
elif args.command == "check-ctranslate2":
if not check_ctranslate2():
sys.exit(1)
elif args.command == "deps-installed":
print_deps_installed()
elif args.command == "generate-wav":
if not args.file:
print("--file is required for generate-wav", file=sys.stderr)
sys.exit(2)
if not generate_sine_wav(args.file):
sys.exit(1)
elif args.command == "prepare-model":
if not args.model or not args.model_dir:
2026-02-20 01:17:53 +01:00
print(
"--model and --model-dir are required for prepare-model",
file=sys.stderr,
)
sys.exit(2)
if not prepare_model(args.model, args.model_dir):
sys.exit(1)
elif args.command == "test-cuda":
if not test_cuda():
sys.exit(1)
if __name__ == "__main__":
main()