mirror of
https://github.com/kuhyx/testsAndMisc-archive.git
synced 2026-07-04 14:43:04 +02:00
Split 18+ Python files that exceeded 500 lines into smaller modules with helper files (prefixed with _). All functions are re-exported from the original modules to maintain backward compatibility with test patches and external imports. Files split: - moviepy_showcase.py (1212 -> 302 + 3 helpers) - anki_generator.py (1174 -> 473 + 4 helpers) - test_analyze_chess_game.py (1152 -> 361 + 2 parts) - poker_modifier_app.py (1024 -> 263 + 2 helpers) - transcribe_fw.py (1007 -> 342 + 3 helpers) - music_generator.py (1002 -> 319 + 2 helpers) - translator.py (951 -> 442 + 2 helpers) - cinema_planner.py (893 -> 369 + 2 helpers) - lichess_bot/main.py (757 -> 495 + _game_logic.py) - test_translator.py (725 -> 289 + part2 + conftest) - test_lichess_api.py (680 -> 475 + part2) - learning_pipe.py (668 -> 375 + 2 helpers) - cache.py (655 -> 360 + _cache_decks.py) - analyze_chess_game.py (632 -> 463 + _move_analysis.py) - visualize_q02.py (609 -> 371 + helper) - repo_explorer.py (602 -> 347 + 2 helpers) - keyboard_coop/main.py (515 -> 416 + _dictionary.py) - scanning.py (501 -> 314 + _enforce_loop.py) All tests pass: 144 lichess_bot (100% branch coverage), 243 others. No new lint errors introduced.
310 lines
9.0 KiB
Python
310 lines
9.0 KiB
Python
"""Cache classes for vocabulary curve excerpts and Anki decks."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import python_pkg.word_frequency.cache as _cache_mod
|
|
|
|
# =============================================================================
|
|
# Vocabulary Curve Cache
|
|
# =============================================================================
|
|
|
|
|
|
class VocabCurveCache:
|
|
"""Cache for vocabulary curve analysis results."""
|
|
|
|
def __init__(self, cache_dir: Path | None = None) -> None:
|
|
"""Initialize vocabulary curve cache.
|
|
|
|
Args:
|
|
cache_dir: Optional custom cache directory.
|
|
"""
|
|
self.cache_dir = (cache_dir or _cache_mod.get_cache_dir()) / "excerpts"
|
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _get_cache_path(self, file_hash: str, length: int) -> Path:
|
|
"""Get path to cache file for given hash and length.
|
|
|
|
Args:
|
|
file_hash: Hash of source file.
|
|
length: Excerpt length.
|
|
|
|
Returns:
|
|
Path to cache file.
|
|
"""
|
|
return self.cache_dir / f"{file_hash[:16]}_{length}.json"
|
|
|
|
def get(
|
|
self, filepath: Path, length: int
|
|
) -> tuple[str, list[tuple[str, int]]] | None:
|
|
"""Get cached excerpt and words for a file and length.
|
|
|
|
Args:
|
|
filepath: Path to source file.
|
|
length: Excerpt length.
|
|
|
|
Returns:
|
|
Tuple of (excerpt, words_with_ranks) or None if not cached.
|
|
"""
|
|
file_hash = _cache_mod.get_file_hash(filepath)
|
|
cache_path = self._get_cache_path(file_hash, length)
|
|
|
|
if not cache_path.exists():
|
|
return None
|
|
|
|
try:
|
|
data = json.loads(cache_path.read_text(encoding="utf-8"))
|
|
except (json.JSONDecodeError, KeyError, OSError):
|
|
return None
|
|
else:
|
|
# Verify hash matches
|
|
if data.get("file_hash") != file_hash:
|
|
return None
|
|
excerpt = data["excerpt"]
|
|
words = [(w, r) for w, r in data["words"]]
|
|
return excerpt, words
|
|
|
|
def set(
|
|
self,
|
|
filepath: Path,
|
|
length: int,
|
|
excerpt: str,
|
|
words: list[tuple[str, int]],
|
|
) -> None:
|
|
"""Store excerpt and words in cache.
|
|
|
|
Args:
|
|
filepath: Path to source file.
|
|
length: Excerpt length.
|
|
excerpt: The excerpt text.
|
|
words: List of (word, rank) tuples.
|
|
"""
|
|
file_hash = _cache_mod.get_file_hash(filepath)
|
|
cache_path = self._get_cache_path(file_hash, length)
|
|
|
|
data = {
|
|
"file_hash": file_hash,
|
|
"filepath": str(filepath),
|
|
"length": length,
|
|
"excerpt": excerpt,
|
|
"words": [[w, r] for w, r in words],
|
|
}
|
|
|
|
cache_path.write_text(
|
|
json.dumps(data, ensure_ascii=False, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
def clear(self) -> None:
|
|
"""Clear all cached excerpts."""
|
|
for cache_file in self.cache_dir.glob("*.json"):
|
|
cache_file.unlink()
|
|
|
|
def stats(self) -> dict[str, Any]:
|
|
"""Get cache statistics.
|
|
|
|
Returns:
|
|
Dict with cache stats.
|
|
"""
|
|
cache_files = list(self.cache_dir.glob("*.json"))
|
|
total_size = sum(f.stat().st_size for f in cache_files)
|
|
return {
|
|
"total_entries": len(cache_files),
|
|
"cache_dir": str(self.cache_dir),
|
|
"cache_size_bytes": total_size,
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# Anki Deck Cache
|
|
# =============================================================================
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AnkiDeckKey:
|
|
"""Key parameters for Anki deck cache lookups."""
|
|
|
|
filepath: Path
|
|
length: int
|
|
target_lang: str
|
|
include_context: bool
|
|
all_vocab: bool
|
|
|
|
|
|
class AnkiDeckCache:
|
|
"""Cache for generated Anki decks."""
|
|
|
|
def __init__(self, cache_dir: Path | None = None) -> None:
|
|
"""Initialize Anki deck cache.
|
|
|
|
Args:
|
|
cache_dir: Optional custom cache directory.
|
|
"""
|
|
self.cache_dir = (cache_dir or _cache_mod.get_cache_dir()) / "anki_decks"
|
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
self.metadata_file = self.cache_dir / "metadata.json"
|
|
self._metadata: dict[str, Any] | None = None
|
|
|
|
def _load_metadata(self) -> dict[str, Any]:
|
|
"""Load metadata from disk."""
|
|
if self._metadata is None:
|
|
if self.metadata_file.exists():
|
|
try:
|
|
self._metadata = json.loads(
|
|
self.metadata_file.read_text(encoding="utf-8")
|
|
)
|
|
except (json.JSONDecodeError, OSError):
|
|
self._metadata = {}
|
|
else:
|
|
self._metadata = {}
|
|
return self._metadata
|
|
|
|
def _save_metadata(self) -> None:
|
|
"""Save metadata to disk."""
|
|
if self._metadata is not None:
|
|
self.metadata_file.write_text(
|
|
json.dumps(self._metadata, ensure_ascii=False, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
@staticmethod
|
|
def _make_key(
|
|
file_hash: str,
|
|
length: int,
|
|
target_lang: str,
|
|
*,
|
|
include_context: bool,
|
|
all_vocab: bool,
|
|
) -> str:
|
|
"""Create cache key for an Anki deck.
|
|
|
|
Args:
|
|
file_hash: Hash of source file.
|
|
length: Excerpt length.
|
|
target_lang: Target language.
|
|
include_context: Whether context is included.
|
|
all_vocab: Whether all vocab is included.
|
|
|
|
Returns:
|
|
Cache key string.
|
|
"""
|
|
flags = f"ctx{int(include_context)}_all{int(all_vocab)}"
|
|
return f"{file_hash[:16]}_{length}_{target_lang}_{flags}"
|
|
|
|
def get(
|
|
self,
|
|
key: AnkiDeckKey,
|
|
) -> tuple[str, str, int, int] | None:
|
|
"""Get cached Anki deck.
|
|
|
|
Args:
|
|
key: Cache key parameters.
|
|
|
|
Returns:
|
|
Tuple of (anki_content, excerpt, num_words, max_rank)
|
|
or None.
|
|
"""
|
|
file_hash = _cache_mod.get_file_hash(key.filepath)
|
|
cache_key = self._make_key(
|
|
file_hash,
|
|
key.length,
|
|
key.target_lang,
|
|
include_context=key.include_context,
|
|
all_vocab=key.all_vocab,
|
|
)
|
|
metadata = self._load_metadata()
|
|
|
|
if cache_key not in metadata:
|
|
return None
|
|
|
|
entry = metadata[cache_key]
|
|
if entry.get("file_hash") != file_hash:
|
|
return None
|
|
|
|
deck_file = self.cache_dir / f"{cache_key}.txt"
|
|
if not deck_file.exists():
|
|
return None
|
|
|
|
try:
|
|
content = deck_file.read_text(encoding="utf-8")
|
|
return (
|
|
content,
|
|
entry["excerpt"],
|
|
entry["num_words"],
|
|
entry["max_rank"],
|
|
)
|
|
except OSError:
|
|
return None
|
|
|
|
def set(
|
|
self,
|
|
key: AnkiDeckKey,
|
|
anki_content: str,
|
|
excerpt: str,
|
|
num_words: int,
|
|
max_rank: int,
|
|
) -> None:
|
|
"""Store Anki deck in cache.
|
|
|
|
Args:
|
|
key: Cache key parameters.
|
|
anki_content: The Anki deck content.
|
|
excerpt: The excerpt text.
|
|
num_words: Number of words in deck.
|
|
max_rank: Maximum word rank.
|
|
"""
|
|
file_hash = _cache_mod.get_file_hash(key.filepath)
|
|
cache_key = self._make_key(
|
|
file_hash,
|
|
key.length,
|
|
key.target_lang,
|
|
include_context=key.include_context,
|
|
all_vocab=key.all_vocab,
|
|
)
|
|
|
|
# Save deck content
|
|
deck_file = self.cache_dir / f"{cache_key}.txt"
|
|
deck_file.write_text(anki_content, encoding="utf-8")
|
|
|
|
# Update metadata
|
|
metadata = self._load_metadata()
|
|
metadata[cache_key] = {
|
|
"file_hash": file_hash,
|
|
"filepath": str(key.filepath),
|
|
"length": key.length,
|
|
"target_lang": key.target_lang,
|
|
"include_context": key.include_context,
|
|
"all_vocab": key.all_vocab,
|
|
"excerpt": excerpt,
|
|
"num_words": num_words,
|
|
"max_rank": max_rank,
|
|
}
|
|
self._save_metadata()
|
|
|
|
def clear(self) -> None:
|
|
"""Clear all cached decks."""
|
|
self._metadata = {}
|
|
for cache_file in self.cache_dir.glob("*.txt"):
|
|
cache_file.unlink()
|
|
if self.metadata_file.exists():
|
|
self.metadata_file.unlink()
|
|
|
|
def stats(self) -> dict[str, Any]:
|
|
"""Get cache statistics.
|
|
|
|
Returns:
|
|
Dict with cache stats.
|
|
"""
|
|
metadata = self._load_metadata()
|
|
cache_files = list(self.cache_dir.glob("*.txt"))
|
|
total_size = sum(f.stat().st_size for f in cache_files)
|
|
return {
|
|
"total_entries": len(metadata),
|
|
"cache_dir": str(self.cache_dir),
|
|
"cache_size_bytes": total_size,
|
|
}
|