mirror of
https://github.com/kuhyx/testsAndMisc.git
synced 2026-07-04 16:23:04 +02:00
Split 18+ Python files that exceeded 500 lines into smaller modules with helper files (prefixed with _). All functions are re-exported from the original modules to maintain backward compatibility with test patches and external imports. Files split: - moviepy_showcase.py (1212 -> 302 + 3 helpers) - anki_generator.py (1174 -> 473 + 4 helpers) - test_analyze_chess_game.py (1152 -> 361 + 2 parts) - poker_modifier_app.py (1024 -> 263 + 2 helpers) - transcribe_fw.py (1007 -> 342 + 3 helpers) - music_generator.py (1002 -> 319 + 2 helpers) - translator.py (951 -> 442 + 2 helpers) - cinema_planner.py (893 -> 369 + 2 helpers) - lichess_bot/main.py (757 -> 495 + _game_logic.py) - test_translator.py (725 -> 289 + part2 + conftest) - test_lichess_api.py (680 -> 475 + part2) - learning_pipe.py (668 -> 375 + 2 helpers) - cache.py (655 -> 360 + _cache_decks.py) - analyze_chess_game.py (632 -> 463 + _move_analysis.py) - visualize_q02.py (609 -> 371 + helper) - repo_explorer.py (602 -> 347 + 2 helpers) - keyboard_coop/main.py (515 -> 416 + _dictionary.py) - scanning.py (501 -> 314 + _enforce_loop.py) All tests pass: 144 lichess_bot (100% branch coverage), 243 others. No new lint errors introduced.
128 lines
3.5 KiB
Python
128 lines
3.5 KiB
Python
"""Output writers for transcription results (SRT, TXT, RTTM)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import timedelta
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
def format_timestamp(seconds: float) -> str:
|
|
"""Format seconds as SRT timestamp HH:MM:SS,mmm."""
|
|
td = timedelta(seconds=seconds)
|
|
total_seconds = int(td.total_seconds())
|
|
hours = total_seconds // 3600
|
|
minutes = (total_seconds % 3600) // 60
|
|
secs = total_seconds % 60
|
|
millis = int((seconds - int(seconds)) * 1000)
|
|
return (
|
|
f"{hours:02d}:{minutes:02d}:"
|
|
f"{secs:02d},{millis:03d}"
|
|
)
|
|
|
|
|
|
def write_srt(
|
|
segments: list[Any], srt_path: str
|
|
) -> None:
|
|
"""Write segments to an SRT subtitle file."""
|
|
with Path(srt_path).open(
|
|
"w", encoding="utf-8"
|
|
) as f:
|
|
for i, seg in enumerate(segments, start=1):
|
|
start = format_timestamp(seg.start)
|
|
end = format_timestamp(seg.end)
|
|
text = (seg.text or "").strip()
|
|
if not text:
|
|
continue
|
|
f.write(
|
|
f"{i}\n{start} --> {end}\n{text}\n\n"
|
|
)
|
|
|
|
|
|
def write_txt(
|
|
segments: list[Any], txt_path: str
|
|
) -> None:
|
|
"""Write segments as plain text, one per line."""
|
|
with Path(txt_path).open(
|
|
"w", encoding="utf-8"
|
|
) as f:
|
|
for seg in segments:
|
|
text = (seg.text or "").strip()
|
|
if text:
|
|
f.write(text + "\n")
|
|
|
|
|
|
def write_srt_with_speakers(
|
|
segments: list[Any],
|
|
labels: list[int],
|
|
path: str,
|
|
) -> None:
|
|
"""Write SRT subtitles with speaker labels."""
|
|
with Path(path).open("w", encoding="utf-8") as f:
|
|
for i, (seg, lab) in enumerate(
|
|
zip(segments, labels, strict=False),
|
|
start=1,
|
|
):
|
|
text = (seg.text or "").strip()
|
|
if not text:
|
|
continue
|
|
spk = f"SPK{lab + 1}"
|
|
start_ts = format_timestamp(seg.start)
|
|
end_ts = format_timestamp(seg.end)
|
|
f.write(
|
|
f"{i}\n{start_ts} --> {end_ts}\n"
|
|
f"[{spk}] {text}\n\n"
|
|
)
|
|
|
|
|
|
def write_txt_with_speakers(
|
|
segments: list[Any],
|
|
labels: list[int],
|
|
path: str,
|
|
) -> None:
|
|
"""Write plain text with speaker labels."""
|
|
with Path(path).open("w", encoding="utf-8") as f:
|
|
for seg, lab in zip(
|
|
segments, labels, strict=False
|
|
):
|
|
text = (seg.text or "").strip()
|
|
if text:
|
|
spk = f"SPK{lab + 1}"
|
|
f.write(f"[{spk}] {text}\n")
|
|
|
|
|
|
def write_rttm(
|
|
segments: list[Any],
|
|
labels: list[int],
|
|
path: str,
|
|
file_id: str = "audio",
|
|
) -> None:
|
|
"""Write RTTM speaker diarization output."""
|
|
with Path(path).open("w", encoding="utf-8") as f:
|
|
for seg, lab in zip(
|
|
segments, labels, strict=False
|
|
):
|
|
start = float(
|
|
getattr(seg, "start", 0.0) or 0.0
|
|
)
|
|
end = float(
|
|
getattr(seg, "end", start) or start
|
|
)
|
|
dur = max(0.0, end - start)
|
|
name = f"SPK{lab + 1}"
|
|
f.write(
|
|
f"SPEAKER {file_id} 1 "
|
|
f"{start:.3f} {dur:.3f} "
|
|
f"<NA> <NA> {name} <NA>\n"
|
|
)
|
|
|
|
|
|
def hhmmss(seconds: float) -> str:
|
|
"""Format seconds as HH:MM:SS string."""
|
|
seconds = max(0.0, float(seconds))
|
|
total_seconds = int(seconds)
|
|
h = total_seconds // 3600
|
|
m = (total_seconds % 3600) // 60
|
|
s = total_seconds % 60
|
|
return f"{h:02d}:{m:02d}:{s:02d}"
|