refactor(word_frequency): fix all ruff violations and remove noqa comments

- Replace print() with logging module throughout - Add type annotations and Google docstrings to all functions - Introduce DeckInput and LessonConfig dataclasses to reduce function parameters - Use specific exception types instead of bare except (BLE001) - Remove all noqa suppression comments - Fix test fixtures: remove unused _capsys/_tmp_path parameters
2026-07-04 13:23:01 +02:00 · 2026-03-13 20:41:31 +01:00 · 2026-03-13 20:41:31 +01:00 · 2bb930db6f
commit 2bb930db6f
parent ac1228f9c4
14 changed files with 2537 additions and 1415 deletions
--- a/python_pkg/word_frequency/analyzer.py
+++ b/python_pkg/word_frequency/analyzer.py
@ -22,11 +22,14 @@ from __future__ import annotations
 import argparse
 from collections import Counter
 import logging
 from pathlib import Path
 import re
 import sys
 from typing import TYPE_CHECKING
 logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
    from collections.abc import Sequence
@ -90,9 +93,7 @@ def read_files(filepaths: Sequence[str | Path]) -> str:
    Returns:
        Combined text content of all files.
    """
-    texts = []
+    texts = [read_file(filepath) for filepath in filepaths]
    for filepath in filepaths:
        texts.append(read_file(filepath))
    return "\n".join(texts)
@ -244,15 +245,15 @@ def main(argv: Sequence[str] | None = None) -> int:
        if args.output:
            Path(args.output).write_text(result, encoding="utf-8")
-            print(f"Output written to {args.output}")
+            logger.info("Output written to %s", args.output)
        else:
-            print(result)
+            sys.stdout.write(result + "\n")
-    except FileNotFoundError as e:
+    except FileNotFoundError:
-        print(f"Error: File not found - {e}", file=sys.stderr)
+        logger.exception("File not found")
        return 1
-    except UnicodeDecodeError as e:
+    except UnicodeDecodeError:
-        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)
+        logger.exception("Could not decode file as UTF-8")
        return 1
    return 0
--- a/python_pkg/word_frequency/anki_generator.py
+++ b/python_pkg/word_frequency/anki_generator.py
--- a/python_pkg/word_frequency/cache.py
+++ b/python_pkg/word_frequency/cache.py
@ -11,15 +11,23 @@ Cache location: ~/.cache/word_frequency/
 from __future__ import annotations
 import argparse
 from dataclasses import dataclass
 import hashlib
 import json
 import logging
 import os
 from pathlib import Path
 from typing import Any
 logger = logging.getLogger(__name__)
 # Default cache directory
 DEFAULT_CACHE_DIR = Path.home() / ".cache" / "word_frequency"
 _ONE_KB = 1024
 _ONE_MB = 1024 * 1024
 def get_cache_dir() -> Path:
    """Get the cache directory, creating it if needed.
@ -42,7 +50,7 @@ def get_file_hash(filepath: Path) -> str:
        Hex digest of file hash.
    """
    hasher = hashlib.sha256()
-    with open(filepath, "rb") as f:
+    with filepath.open("rb") as f:
        # Read in chunks for large files
        for chunk in iter(lambda: f.read(65536), b""):
            hasher.update(chunk)
@ -274,14 +282,15 @@ class VocabCurveCache:
        try:
            data = json.loads(cache_path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, KeyError, OSError):
            return None
        else:
            # Verify hash matches
            if data.get("file_hash") != file_hash:
                return None
            excerpt = data["excerpt"]
            words = [(w, r) for w, r in data["words"]]
            return excerpt, words
        except (json.JSONDecodeError, KeyError, OSError):
            return None
    def set(
        self,
@ -339,6 +348,17 @@ class VocabCurveCache:
 # =============================================================================
@dataclass(frozen=True)
 class AnkiDeckKey:
    """Key parameters for Anki deck cache lookups."""
    filepath: Path
    length: int
    target_lang: str
    include_context: bool
    all_vocab: bool
 class AnkiDeckCache:
    """Cache for generated Anki decks."""
@ -380,6 +400,7 @@ class AnkiDeckCache:
        file_hash: str,
        length: int,
        target_lang: str,
        *,
        include_context: bool,
        all_vocab: bool,
    ) -> str:
@ -400,36 +421,35 @@ class AnkiDeckCache:
    def get(
        self,
-        filepath: Path,
+        key: AnkiDeckKey,
        length: int,
        target_lang: str,
        include_context: bool,
        all_vocab: bool,
    ) -> tuple[str, str, int, int] | None:
        """Get cached Anki deck.
        Args:
-            filepath: Path to source file.
+            key: Cache key parameters.
            length: Excerpt length.
            target_lang: Target language.
            include_context: Whether context is included.
            all_vocab: Whether all vocab is included.
        Returns:
-            Tuple of (anki_content, excerpt, num_words, max_rank) or None.
+            Tuple of (anki_content, excerpt, num_words, max_rank)
            or None.
        """
-        file_hash = get_file_hash(filepath)
+        file_hash = get_file_hash(key.filepath)
-        key = self._make_key(file_hash, length, target_lang, include_context, all_vocab)
+        cache_key = self._make_key(
            file_hash,
            key.length,
            key.target_lang,
            include_context=key.include_context,
            all_vocab=key.all_vocab,
        )
        metadata = self._load_metadata()
-        if key not in metadata:
+        if cache_key not in metadata:
            return None
-        entry = metadata[key]
+        entry = metadata[cache_key]
        if entry.get("file_hash") != file_hash:
            return None
-        deck_file = self.cache_dir / f"{key}.txt"
+        deck_file = self.cache_dir / f"{cache_key}.txt"
        if not deck_file.exists():
            return None
@ -446,11 +466,7 @@ class AnkiDeckCache:
    def set(
        self,
-        filepath: Path,
+        key: AnkiDeckKey,
        length: int,
        target_lang: str,
        include_context: bool,
        all_vocab: bool,
        anki_content: str,
        excerpt: str,
        num_words: int,
@ -459,32 +475,34 @@ class AnkiDeckCache:
        """Store Anki deck in cache.
        Args:
-            filepath: Path to source file.
+            key: Cache key parameters.
            length: Excerpt length.
            target_lang: Target language.
            include_context: Whether context is included.
            all_vocab: Whether all vocab is included.
            anki_content: The Anki deck content.
            excerpt: The excerpt text.
            num_words: Number of words in deck.
            max_rank: Maximum word rank.
        """
-        file_hash = get_file_hash(filepath)
+        file_hash = get_file_hash(key.filepath)
-        key = self._make_key(file_hash, length, target_lang, include_context, all_vocab)
+        cache_key = self._make_key(
            file_hash,
            key.length,
            key.target_lang,
            include_context=key.include_context,
            all_vocab=key.all_vocab,
        )
        # Save deck content
-        deck_file = self.cache_dir / f"{key}.txt"
+        deck_file = self.cache_dir / f"{cache_key}.txt"
        deck_file.write_text(anki_content, encoding="utf-8")
        # Update metadata
        metadata = self._load_metadata()
-        metadata[key] = {
+        metadata[cache_key] = {
            "file_hash": file_hash,
-            "filepath": str(filepath),
+            "filepath": str(key.filepath),
-            "length": length,
+            "length": key.length,
-            "target_lang": target_lang,
+            "target_lang": key.target_lang,
-            "include_context": include_context,
+            "include_context": key.include_context,
-            "all_vocab": all_vocab,
+            "all_vocab": key.all_vocab,
            "excerpt": excerpt,
            "num_words": num_words,
            "max_rank": max_rank,
@ -519,34 +537,33 @@ class AnkiDeckCache:
 # Global Cache Instances
 # =============================================================================
-# Singleton instances
+class _CacheHolder:
-_translation_cache: TranslationCache | None = None
+    """Holds singleton cache instances."""
-_vocab_curve_cache: VocabCurveCache | None = None
+
-_anki_deck_cache: AnkiDeckCache | None = None
+    translation: TranslationCache | None = None
    vocab_curve: VocabCurveCache | None = None
    anki_deck: AnkiDeckCache | None = None
 def get_translation_cache() -> TranslationCache:
    """Get the global translation cache instance."""
-    global _translation_cache
+    if _CacheHolder.translation is None:
-    if _translation_cache is None:
+        _CacheHolder.translation = TranslationCache()
-        _translation_cache = TranslationCache()
+    return _CacheHolder.translation
    return _translation_cache
 def get_vocab_curve_cache() -> VocabCurveCache:
    """Get the global vocabulary curve cache instance."""
-    global _vocab_curve_cache
+    if _CacheHolder.vocab_curve is None:
-    if _vocab_curve_cache is None:
+        _CacheHolder.vocab_curve = VocabCurveCache()
-        _vocab_curve_cache = VocabCurveCache()
+    return _CacheHolder.vocab_curve
    return _vocab_curve_cache
 def get_anki_deck_cache() -> AnkiDeckCache:
    """Get the global Anki deck cache instance."""
-    global _anki_deck_cache
+    if _CacheHolder.anki_deck is None:
-    if _anki_deck_cache is None:
+        _CacheHolder.anki_deck = AnkiDeckCache()
-        _anki_deck_cache = AnkiDeckCache()
+    return _CacheHolder.anki_deck
    return _anki_deck_cache
 def clear_all_caches() -> None:
@ -575,8 +592,6 @@ def main() -> int:
    Returns:
        Exit code.
    """
    import argparse
    parser = argparse.ArgumentParser(description="Manage word frequency caches")
    parser.add_argument("--stats", action="store_true", help="Show cache statistics")
    parser.add_argument("--clear", action="store_true", help="Clear all caches")
@ -594,42 +609,42 @@ def main() -> int:
    if args.clear:
        clear_all_caches()
-        print("All caches cleared.")
+        logger.info("All caches cleared.")
        return 0
    if args.clear_translations:
        get_translation_cache().clear()
-        print("Translation cache cleared.")
+        logger.info("Translation cache cleared.")
        return 0
    if args.clear_excerpts:
        get_vocab_curve_cache().clear()
-        print("Excerpt cache cleared.")
+        logger.info("Excerpt cache cleared.")
        return 0
    if args.clear_anki:
        get_anki_deck_cache().clear()
-        print("Anki deck cache cleared.")
+        logger.info("Anki deck cache cleared.")
        return 0
    # Default: show stats
    stats = get_all_cache_stats()
-    print("Cache Statistics")
+    logger.info("Cache Statistics")
-    print("=" * 50)
+    logger.info("=" * 50)
    for cache_name, cache_stats in stats.items():
-        print(f"\n{cache_name.upper()}:")
+        logger.info("\n%s:", cache_name.upper())
        for key, value in cache_stats.items():
            if key == "cache_size_bytes":
                # Format as human-readable
-                if value < 1024:
+                if value < _ONE_KB:
                    size_str = f"{value} B"
-                elif value < 1024 * 1024:
+                elif value < _ONE_MB:
-                    size_str = f"{value / 1024:.1f} KB"
+                    size_str = f"{value / _ONE_KB:.1f} KB"
                else:
-                    size_str = f"{value / (1024 * 1024):.1f} MB"
+                    size_str = f"{value / _ONE_MB:.1f} MB"
-                print(f"  {key}: {size_str}")
+                logger.info("  %s: %s", key, size_str)
            else:
-                print(f"  {key}: {value}")
+                logger.info("  %s: %s", key, value)
    return 0
--- a/python_pkg/word_frequency/cache.py.bak
+++ b/python_pkg/word_frequency/cache.py.bak
@ -0,0 +1,640 @@
 #!/usr/bin/env python3
 """Caching utilities for word frequency analysis.
 Provides disk-based caching for:
 - Translations (word -> translation mappings)
 - Vocabulary curve excerpts (file + length -> excerpt + words)
 - Generated Anki decks
 Cache location: ~/.cache/word_frequency/
 """
 from __future__ import annotations
 import hashlib
 import json
 import os
 from pathlib import Path
 from typing import Any
 # Default cache directory
 DEFAULT_CACHE_DIR = Path.home() / ".cache" / "word_frequency"
 def get_cache_dir() -> Path:
    """Get the cache directory, creating it if needed.
    Returns:
        Path to cache directory.
    """
    cache_dir = Path(os.environ.get("WORD_FREQ_CACHE_DIR", str(DEFAULT_CACHE_DIR)))
    cache_dir.mkdir(parents=True, exist_ok=True)
    return cache_dir
 def get_file_hash(filepath: Path) -> str:
    """Compute SHA256 hash of a file's contents.
    Args:
        filepath: Path to file.
    Returns:
        Hex digest of file hash.
    """
    hasher = hashlib.sha256()
    with open(filepath, "rb") as f:
        # Read in chunks for large files
        for chunk in iter(lambda: f.read(65536), b""):
            hasher.update(chunk)
    return hasher.hexdigest()
 def get_text_hash(text: str) -> str:
    """Compute SHA256 hash of text content.
    Args:
        text: Text to hash.
    Returns:
        Hex digest of text hash.
    """
    return hashlib.sha256(text.encode("utf-8")).hexdigest()
 # =============================================================================
 # Translation Cache
 # =============================================================================
 class TranslationCache:
    """Cache for word translations."""
    def __init__(self, cache_dir: Path | None = None) -> None:
        """Initialize translation cache.
        Args:
            cache_dir: Optional custom cache directory.
        """
        self.cache_dir = cache_dir or get_cache_dir()
        self.cache_file = self.cache_dir / "translations.json"
        self._cache: dict[str, str] | None = None
        self._dirty = False  # Track if cache needs saving
    def _load_cache(self) -> dict[str, str]:
        """Load cache from disk."""
        if self._cache is None:
            if self.cache_file.exists():
                try:
                    self._cache = json.loads(
                        self.cache_file.read_text(encoding="utf-8")
                    )
                except (json.JSONDecodeError, OSError):
                    self._cache = {}
            else:
                self._cache = {}
        return self._cache
    def _save_cache(self) -> None:
        """Save cache to disk if dirty."""
        if self._cache is not None and self._dirty:
            self.cache_file.write_text(
                json.dumps(self._cache, ensure_ascii=False, indent=2),
                encoding="utf-8",
            )
            self._dirty = False
    def flush(self) -> None:
        """Force save cache to disk."""
        self._save_cache()
    @staticmethod
    def _make_key(word: str, source_lang: str, target_lang: str) -> str:
        """Create cache key for a translation.
        Args:
            word: Word to translate.
            source_lang: Source language code.
            target_lang: Target language code.
        Returns:
            Cache key string.
        """
        return f"{source_lang}:{target_lang}:{word.lower()}"
    def get(self, word: str, source_lang: str, target_lang: str) -> str | None:
        """Get cached translation.
        Args:
            word: Word to look up.
            source_lang: Source language code.
            target_lang: Target language code.
        Returns:
            Cached translation or None if not found.
        """
        cache = self._load_cache()
        key = self._make_key(word, source_lang, target_lang)
        return cache.get(key)
    def set(
        self,
        word: str,
        source_lang: str,
        target_lang: str,
        translation: str,
        *,
        auto_save: bool = False,
    ) -> None:
        """Store translation in cache.
        Args:
            word: Original word.
            source_lang: Source language code.
            target_lang: Target language code.
            translation: Translated word.
            auto_save: If True, save to disk immediately.
        """
        cache = self._load_cache()
        key = self._make_key(word, source_lang, target_lang)
        cache[key] = translation
        self._dirty = True
        if auto_save:
            self._save_cache()
    def get_many(
        self, words: list[str], source_lang: str, target_lang: str
    ) -> dict[str, str]:
        """Get multiple cached translations.
        Args:
            words: Words to look up.
            source_lang: Source language code.
            target_lang: Target language code.
        Returns:
            Dict mapping words to their cached translations.
        """
        cache = self._load_cache()
        result: dict[str, str] = {}
        for word in words:
            key = self._make_key(word, source_lang, target_lang)
            if key in cache:
                result[word.lower()] = cache[key]
        return result
    def set_many(
        self,
        translations: dict[str, str],
        source_lang: str,
        target_lang: str,
    ) -> None:
        """Store multiple translations in cache and save to disk.
        Args:
            translations: Dict mapping words to translations.
            source_lang: Source language code.
            target_lang: Target language code.
        """
        cache = self._load_cache()
        for word, translation in translations.items():
            key = self._make_key(word, source_lang, target_lang)
            cache[key] = translation
        self._dirty = True
        self._save_cache()  # Save once after all additions
    def clear(self) -> None:
        """Clear all cached translations."""
        self._cache = {}
        self._dirty = False
        if self.cache_file.exists():
            self.cache_file.unlink()
    def stats(self) -> dict[str, Any]:
        """Get cache statistics.
        Returns:
            Dict with cache stats.
        """
        cache = self._load_cache()
        return {
            "total_entries": len(cache),
            "cache_file": str(self.cache_file),
            "cache_size_bytes": (
                self.cache_file.stat().st_size if self.cache_file.exists() else 0
            ),
        }
 # =============================================================================
 # Vocabulary Curve Cache
 # =============================================================================
 class VocabCurveCache:
    """Cache for vocabulary curve analysis results."""
    def __init__(self, cache_dir: Path | None = None) -> None:
        """Initialize vocabulary curve cache.
        Args:
            cache_dir: Optional custom cache directory.
        """
        self.cache_dir = (cache_dir or get_cache_dir()) / "excerpts"
        self.cache_dir.mkdir(parents=True, exist_ok=True)
    def _get_cache_path(self, file_hash: str, length: int) -> Path:
        """Get path to cache file for given hash and length.
        Args:
            file_hash: Hash of source file.
            length: Excerpt length.
        Returns:
            Path to cache file.
        """
        return self.cache_dir / f"{file_hash[:16]}_{length}.json"
    def get(
        self, filepath: Path, length: int
    ) -> tuple[str, list[tuple[str, int]]] | None:
        """Get cached excerpt and words for a file and length.
        Args:
            filepath: Path to source file.
            length: Excerpt length.
        Returns:
            Tuple of (excerpt, words_with_ranks) or None if not cached.
        """
        file_hash = get_file_hash(filepath)
        cache_path = self._get_cache_path(file_hash, length)
        if not cache_path.exists():
            return None
        try:
            data = json.loads(cache_path.read_text(encoding="utf-8"))
            # Verify hash matches
            if data.get("file_hash") != file_hash:
                return None
            excerpt = data["excerpt"]
            words = [(w, r) for w, r in data["words"]]
            return excerpt, words
        except (json.JSONDecodeError, KeyError, OSError):
            return None
    def set(
        self,
        filepath: Path,
        length: int,
        excerpt: str,
        words: list[tuple[str, int]],
    ) -> None:
        """Store excerpt and words in cache.
        Args:
            filepath: Path to source file.
            length: Excerpt length.
            excerpt: The excerpt text.
            words: List of (word, rank) tuples.
        """
        file_hash = get_file_hash(filepath)
        cache_path = self._get_cache_path(file_hash, length)
        data = {
            "file_hash": file_hash,
            "filepath": str(filepath),
            "length": length,
            "excerpt": excerpt,
            "words": [[w, r] for w, r in words],
        }
        cache_path.write_text(
            json.dumps(data, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )
    def clear(self) -> None:
        """Clear all cached excerpts."""
        for cache_file in self.cache_dir.glob("*.json"):
            cache_file.unlink()
    def stats(self) -> dict[str, Any]:
        """Get cache statistics.
        Returns:
            Dict with cache stats.
        """
        cache_files = list(self.cache_dir.glob("*.json"))
        total_size = sum(f.stat().st_size for f in cache_files)
        return {
            "total_entries": len(cache_files),
            "cache_dir": str(self.cache_dir),
            "cache_size_bytes": total_size,
        }
 # =============================================================================
 # Anki Deck Cache
 # =============================================================================
 class AnkiDeckCache:
    """Cache for generated Anki decks."""
    def __init__(self, cache_dir: Path | None = None) -> None:
        """Initialize Anki deck cache.
        Args:
            cache_dir: Optional custom cache directory.
        """
        self.cache_dir = (cache_dir or get_cache_dir()) / "anki_decks"
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        self.metadata_file = self.cache_dir / "metadata.json"
        self._metadata: dict[str, Any] | None = None
    def _load_metadata(self) -> dict[str, Any]:
        """Load metadata from disk."""
        if self._metadata is None:
            if self.metadata_file.exists():
                try:
                    self._metadata = json.loads(
                        self.metadata_file.read_text(encoding="utf-8")
                    )
                except (json.JSONDecodeError, OSError):
                    self._metadata = {}
            else:
                self._metadata = {}
        return self._metadata
    def _save_metadata(self) -> None:
        """Save metadata to disk."""
        if self._metadata is not None:
            self.metadata_file.write_text(
                json.dumps(self._metadata, ensure_ascii=False, indent=2),
                encoding="utf-8",
            )
    @staticmethod
    def _make_key(
        file_hash: str,
        length: int,
        target_lang: str,
        include_context: bool,
        all_vocab: bool,
    ) -> str:
        """Create cache key for an Anki deck.
        Args:
            file_hash: Hash of source file.
            length: Excerpt length.
            target_lang: Target language.
            include_context: Whether context is included.
            all_vocab: Whether all vocab is included.
        Returns:
            Cache key string.
        """
        flags = f"ctx{int(include_context)}_all{int(all_vocab)}"
        return f"{file_hash[:16]}_{length}_{target_lang}_{flags}"
    def get(
        self,
        filepath: Path,
        length: int,
        target_lang: str,
        include_context: bool,
        all_vocab: bool,
    ) -> tuple[str, str, int, int] | None:
        """Get cached Anki deck.
        Args:
            filepath: Path to source file.
            length: Excerpt length.
            target_lang: Target language.
            include_context: Whether context is included.
            all_vocab: Whether all vocab is included.
        Returns:
            Tuple of (anki_content, excerpt, num_words, max_rank) or None.
        """
        file_hash = get_file_hash(filepath)
        key = self._make_key(file_hash, length, target_lang, include_context, all_vocab)
        metadata = self._load_metadata()
        if key not in metadata:
            return None
        entry = metadata[key]
        if entry.get("file_hash") != file_hash:
            return None
        deck_file = self.cache_dir / f"{key}.txt"
        if not deck_file.exists():
            return None
        try:
            content = deck_file.read_text(encoding="utf-8")
            return (
                content,
                entry["excerpt"],
                entry["num_words"],
                entry["max_rank"],
            )
        except OSError:
            return None
    def set(
        self,
        filepath: Path,
        length: int,
        target_lang: str,
        include_context: bool,
        all_vocab: bool,
        anki_content: str,
        excerpt: str,
        num_words: int,
        max_rank: int,
    ) -> None:
        """Store Anki deck in cache.
        Args:
            filepath: Path to source file.
            length: Excerpt length.
            target_lang: Target language.
            include_context: Whether context is included.
            all_vocab: Whether all vocab is included.
            anki_content: The Anki deck content.
            excerpt: The excerpt text.
            num_words: Number of words in deck.
            max_rank: Maximum word rank.
        """
        file_hash = get_file_hash(filepath)
        key = self._make_key(file_hash, length, target_lang, include_context, all_vocab)
        # Save deck content
        deck_file = self.cache_dir / f"{key}.txt"
        deck_file.write_text(anki_content, encoding="utf-8")
        # Update metadata
        metadata = self._load_metadata()
        metadata[key] = {
            "file_hash": file_hash,
            "filepath": str(filepath),
            "length": length,
            "target_lang": target_lang,
            "include_context": include_context,
            "all_vocab": all_vocab,
            "excerpt": excerpt,
            "num_words": num_words,
            "max_rank": max_rank,
        }
        self._save_metadata()
    def clear(self) -> None:
        """Clear all cached decks."""
        self._metadata = {}
        for cache_file in self.cache_dir.glob("*.txt"):
            cache_file.unlink()
        if self.metadata_file.exists():
            self.metadata_file.unlink()
    def stats(self) -> dict[str, Any]:
        """Get cache statistics.
        Returns:
            Dict with cache stats.
        """
        metadata = self._load_metadata()
        cache_files = list(self.cache_dir.glob("*.txt"))
        total_size = sum(f.stat().st_size for f in cache_files)
        return {
            "total_entries": len(metadata),
            "cache_dir": str(self.cache_dir),
            "cache_size_bytes": total_size,
        }
 # =============================================================================
 # Global Cache Instances
 # =============================================================================
 # Singleton instances
 _translation_cache: TranslationCache | None = None
 _vocab_curve_cache: VocabCurveCache | None = None
 _anki_deck_cache: AnkiDeckCache | None = None
 def get_translation_cache() -> TranslationCache:
    """Get the global translation cache instance."""
    global _translation_cache
    if _translation_cache is None:
        _translation_cache = TranslationCache()
    return _translation_cache
 def get_vocab_curve_cache() -> VocabCurveCache:
    """Get the global vocabulary curve cache instance."""
    global _vocab_curve_cache
    if _vocab_curve_cache is None:
        _vocab_curve_cache = VocabCurveCache()
    return _vocab_curve_cache
 def get_anki_deck_cache() -> AnkiDeckCache:
    """Get the global Anki deck cache instance."""
    global _anki_deck_cache
    if _anki_deck_cache is None:
        _anki_deck_cache = AnkiDeckCache()
    return _anki_deck_cache
 def clear_all_caches() -> None:
    """Clear all caches."""
    get_translation_cache().clear()
    get_vocab_curve_cache().clear()
    get_anki_deck_cache().clear()
 def get_all_cache_stats() -> dict[str, dict[str, Any]]:
    """Get statistics for all caches.
    Returns:
        Dict with stats for each cache type.
    """
    return {
        "translations": get_translation_cache().stats(),
        "vocab_curves": get_vocab_curve_cache().stats(),
        "anki_decks": get_anki_deck_cache().stats(),
    }
 def main() -> int:
    """CLI for cache management.
    Returns:
        Exit code.
    """
    import argparse
    parser = argparse.ArgumentParser(description="Manage word frequency caches")
    parser.add_argument("--stats", action="store_true", help="Show cache statistics")
    parser.add_argument("--clear", action="store_true", help="Clear all caches")
    parser.add_argument(
        "--clear-translations", action="store_true", help="Clear translation cache"
    )
    parser.add_argument(
        "--clear-excerpts", action="store_true", help="Clear excerpt cache"
    )
    parser.add_argument(
        "--clear-anki", action="store_true", help="Clear Anki deck cache"
    )
    args = parser.parse_args()
    if args.clear:
        clear_all_caches()
        print("All caches cleared.")
        return 0
    if args.clear_translations:
        get_translation_cache().clear()
        print("Translation cache cleared.")
        return 0
    if args.clear_excerpts:
        get_vocab_curve_cache().clear()
        print("Excerpt cache cleared.")
        return 0
    if args.clear_anki:
        get_anki_deck_cache().clear()
        print("Anki deck cache cleared.")
        return 0
    # Default: show stats
    stats = get_all_cache_stats()
    print("Cache Statistics")
    print("=" * 50)
    for cache_name, cache_stats in stats.items():
        print(f"\n{cache_name.upper()}:")
        for key, value in cache_stats.items():
            if key == "cache_size_bytes":
                # Format as human-readable
                if value < 1024:
                    size_str = f"{value} B"
                elif value < 1024 * 1024:
                    size_str = f"{value / 1024:.1f} KB"
                else:
                    size_str = f"{value / (1024 * 1024):.1f} MB"
                print(f"  {key}: {size_str}")
            else:
                print(f"  {key}: {value}")
    return 0
 if __name__ == "__main__":
    import sys
    sys.exit(main())
--- a/python_pkg/word_frequency/excerpt_finder.py
+++ b/python_pkg/word_frequency/excerpt_finder.py
@ -6,21 +6,28 @@ specified length (in words) where the target words appear most frequently.
 Usage:
    # From raw text with target words
-    python -m python_pkg.word_frequency.excerpt_finder --text "they went somewhere he and she and the guy" --words and the --length 3
+    python -m python_pkg.word_frequency.excerpt_finder \
        --text "they went somewhere he and she and the guy" \
        --words and the --length 3
    # From a file
-    python -m python_pkg.word_frequency.excerpt_finder --file path/to/file.txt --words the and of --length 10
+    python -m python_pkg.word_frequency.excerpt_finder \
        --file path/to/file.txt --words the and of --length 10
    # Target words from a file (one word per line)
-    python -m python_pkg.word_frequency.excerpt_finder --file text.txt --words-file targets.txt --length 20
+    python -m python_pkg.word_frequency.excerpt_finder \
        --file text.txt --words-file targets.txt --length 20
    # Show top N excerpts instead of just the best one
-    python -m python_pkg.word_frequency.excerpt_finder --file text.txt --words the and --length 10 --top 5
+    python -m python_pkg.word_frequency.excerpt_finder \
        --file text.txt --words the and --length 10 --top 5
 """
 from __future__ import annotations
 import argparse
 from dataclasses import dataclass
 import logging
 from pathlib import Path
 import sys
 from typing import TYPE_CHECKING, NamedTuple
@ -33,6 +40,17 @@ except ModuleNotFoundError:
 if TYPE_CHECKING:
    from collections.abc import Sequence
 logger = logging.getLogger(__name__)
@dataclass(frozen=True)
 class ExcerptSearchOptions:
    """Options for excerpt search and display."""
    case_sensitive: bool = False
    top_n: int = 1
    context_words: int = 0
 class ExcerptResult(NamedTuple):
    """Result of an excerpt search."""
@ -141,45 +159,28 @@ def find_best_excerpt(
    return output
-def find_best_excerpt_with_context(
+def _expand_results_with_context(
    text: str,
-    target_words: Sequence[str],
+    base_results: list[ExcerptResult],
-    excerpt_length: int,
+    context_words: int,
    *,
    case_sensitive: bool = False,
    top_n: int = 1,
    context_words: int = 0,
 ) -> list[ExcerptResult]:
-    """Find the excerpt(s) with optional surrounding context.
+    """Expand excerpt results with surrounding context words.
    Args:
-        text: The input text to search.
+        text: The full source text.
-        target_words: Words to search for in the excerpt.
+        base_results: Results from find_best_excerpt.
-        excerpt_length: Length of the excerpt in words.
+        context_words: Number of words to include before/after.
-        case_sensitive: If False, match words case-insensitively.
+        case_sensitive: If False, words are lowercased.
        top_n: Number of top excerpts to return.
        context_words: Number of words to include before/after the excerpt.
    Returns:
-        List of ExcerptResult with context included in the excerpt.
+        Expanded ExcerptResult list with context.
    """
    base_results = find_best_excerpt(
        text,
        target_words,
        excerpt_length,
        case_sensitive=case_sensitive,
        top_n=top_n,
    )
    if context_words <= 0:
        return base_results
    # Re-extract all words to get context
    all_words = extract_words(text, case_sensitive=case_sensitive)
    expanded_results: list[ExcerptResult] = []
    for result in base_results:
        # Expand the excerpt with context
        ctx_start = max(0, result.start_index - context_words)
        ctx_end = min(len(all_words), result.end_index + context_words)
        context_excerpt_words = all_words[ctx_start:ctx_end]
@ -198,6 +199,40 @@ def find_best_excerpt_with_context(
    return expanded_results
 def find_best_excerpt_with_context(
    text: str,
    target_words: Sequence[str],
    excerpt_length: int,
    options: ExcerptSearchOptions | None = None,
 ) -> list[ExcerptResult]:
    """Find the excerpt(s) with optional surrounding context.
    Args:
        text: The input text to search.
        target_words: Words to search for in the excerpt.
        excerpt_length: Length of the excerpt in words.
        options: Search options (case_sensitive, top_n, context_words).
    Returns:
        List of ExcerptResult with context included in the excerpt.
    """
    opts = options or ExcerptSearchOptions()
    base_results = find_best_excerpt(
        text,
        target_words,
        excerpt_length,
        case_sensitive=opts.case_sensitive,
        top_n=opts.top_n,
    )
    if opts.context_words <= 0:
        return base_results
    return _expand_results_with_context(
        text, base_results, opts.context_words, case_sensitive=opts.case_sensitive
    )
 def format_excerpt_results(
    results: list[ExcerptResult],
    target_words: Sequence[str],
@ -224,7 +259,8 @@ def format_excerpt_results(
        lines.append(f'Excerpt: "{result.excerpt}"')
        lines.append(f"Word position: {result.start_index} - {result.end_index - 1}")
        lines.append(
-            f"Matches: {result.match_count}/{len(result.words)} ({result.match_percentage:.2f}%)"
+            f"Matches: {result.match_count}/{len(result.words)}"
            f" ({result.match_percentage:.2f}%)"
        )
        lines.append("")
@ -316,10 +352,7 @@ def main(argv: Sequence[str] | None = None) -> int:
    try:
        # Get input text
-        if args.text:
+        text = args.text or read_file(args.file)
            text = args.text
        else:
            text = read_file(args.file)
        # Get target words
        if args.words:
@ -329,7 +362,7 @@ def main(argv: Sequence[str] | None = None) -> int:
            target_words = [w.strip() for w in words_content.splitlines() if w.strip()]
        if not target_words:
-            print("Error: No target words provided", file=sys.stderr)
+            logger.error("No target words provided")
            return 1
        # Find excerpts
@ -337,9 +370,11 @@ def main(argv: Sequence[str] | None = None) -> int:
            text,
            target_words,
            args.length,
-            case_sensitive=args.case_sensitive,
+            ExcerptSearchOptions(
-            top_n=args.top,
+                case_sensitive=args.case_sensitive,
-            context_words=args.context,
+                top_n=args.top,
                context_words=args.context,
            ),
        )
        # Format and print results
@ -347,15 +382,15 @@ def main(argv: Sequence[str] | None = None) -> int:
        if args.output:
            Path(args.output).write_text(output, encoding="utf-8")
-            print(f"Output written to {args.output}")
+            logger.info("Output written to %s", args.output)
        else:
-            print(output)
+            logger.info("%s", output)
-    except FileNotFoundError as e:
+    except FileNotFoundError:
-        print(f"Error: File not found - {e}", file=sys.stderr)
+        logger.exception("File not found")
        return 1
-    except UnicodeDecodeError as e:
+    except UnicodeDecodeError:
-        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)
+        logger.exception("Could not decode file as UTF-8")
        return 1
    return 0
--- a/python_pkg/word_frequency/learning_pipe.py
+++ b/python_pkg/word_frequency/learning_pipe.py
@ -1,7 +1,8 @@
 #!/usr/bin/env python3
-"""Learning pipe - combines word frequency analysis with excerpt finding for language learning.
+r"""Learning pipe - combines word frequency analysis with excerpt finding.
 Helps language learners by:
 This script helps language learners by:
 1. Analyzing a text to find the most common words
 2. Finding excerpts where those common words are most prevalent
 3. Creating a progressive learning experience in batches
@ -11,26 +12,35 @@ The idea is to:
 - Then read excerpts that are dense with those words
 - Progressively learn more words and more complex excerpts
-Usage:
+Usage::
-    # Basic usage - get top 20 words and find excerpts with them
+
-    python -m python_pkg.word_frequency.learning_pipe --file text.txt
+    # Basic usage
    python -m python_pkg.word_frequency.learning_pipe \\
        --file text.txt
    # Custom batch size and excerpt length
-    python -m python_pkg.word_frequency.learning_pipe --file text.txt --batch-size 30 --excerpt-length 50
+    python -m python_pkg.word_frequency.learning_pipe \\
        --file text.txt --batch-size 30 --excerpt-length 50
    # Multiple batches for progressive learning
-    python -m python_pkg.word_frequency.learning_pipe --file text.txt --batches 5 --batch-size 20
+    python -m python_pkg.word_frequency.learning_pipe \\
        --file text.txt --batches 5 --batch-size 20
    # Output to file
-    python -m python_pkg.word_frequency.learning_pipe --file text.txt --output lesson.txt
+    python -m python_pkg.word_frequency.learning_pipe \\
        --file text.txt --output lesson.txt
-    # Skip common words (like "the", "a", "is") using a stopwords file
+    # Skip common words using a stopwords file
-    python -m python_pkg.word_frequency.learning_pipe --file text.txt --stopwords stopwords.txt
+    python -m python_pkg.word_frequency.learning_pipe \\
        --file text.txt --stopwords stopwords.txt
 """
 from __future__ import annotations
 import argparse
 from dataclasses import dataclass
 from dataclasses import replace as _replace_dc
 import logging
 from pathlib import Path
 import sys
 from typing import TYPE_CHECKING
@ -53,6 +63,8 @@ except ModuleNotFoundError:
 if TYPE_CHECKING:
    from collections.abc import Sequence
 logger = logging.getLogger(__name__)
 # Common stopwords for various languages (can be overridden with --stopwords)
 DEFAULT_STOPWORDS_EN = frozenset(
@ -181,57 +193,210 @@ def load_stopwords(filepath: str | Path | None) -> frozenset[str]:
    )
@dataclass(frozen=True)
 class LessonConfig:
    """Configuration for learning lesson generation."""
    batch_size: int = 20
    num_batches: int = 1
    excerpt_length: int = 30
    excerpts_per_batch: int = 3
    stopwords: frozenset[str] | None = None
    skip_default_stopwords: bool = False
    skip_numbers: bool = True
    case_sensitive: bool = False
    translate_from: str | None = None
    translate_to: str | None = None
 def _resolve_stopwords(config: LessonConfig) -> frozenset[str]:
    """Resolve combined stopwords from config."""
    if config.skip_default_stopwords:
        return config.stopwords or frozenset()
    return DEFAULT_STOPWORDS_EN | (config.stopwords or frozenset())
 def _detect_translation_language(
    text: str,
    config: LessonConfig,
    lines: list[str],
 ) -> tuple[str | None, str | None]:
    """Detect translation settings and return (from, to) pair."""
    actual_from = config.translate_from
    actual_to = config.translate_to or "en"
    if actual_from == "auto" or (
        config.translate_to and not config.translate_from
    ):
        detected = detect_language(text)
        if detected:
            actual_from = detected
            lines.append(f"Detected language: {detected}")
        else:
            lines.append(
                "Warning: Could not detect language "
                "(install langdetect: "
                "pip install langdetect)"
            )
            actual_from = None
    return actual_from, actual_to
 def _format_word_list(
    batch_words: list[tuple[str, int]],
    start_idx: int,
    total_words: int,
    translations: dict[str, str],
 ) -> list[str]:
    """Format the vocabulary word list for a batch."""
    lines: list[str] = []
    for i, (word, count) in enumerate(
        batch_words, start=start_idx + 1,
    ):
        percentage = (count / total_words) * 100
        if translations:
            trans = translations.get(word, "?")
            lines.append(
                f"  {i:3}. {word:<20} -> {trans:<20}"
                f" ({count:,} occurrences, "
                f"{percentage:.2f}%)"
            )
        else:
            lines.append(
                f"  {i:3}. {word:<20}"
                f" ({count:,} occurrences, "
                f"{percentage:.2f}%)"
            )
    return lines
@dataclass(frozen=True)
 class _LessonContext:
    """Shared context for batch generation."""
    text: str
    word_counts: dict[str, int]
    config: LessonConfig
 def _generate_batch_section(
    ctx: _LessonContext,
    batch_num: int,
    batch_words: list[tuple[str, int]],
    cumulative_words: list[str],
 ) -> list[str]:
    """Generate lines for a single batch section."""
    config = ctx.config
    total_words = sum(ctx.word_counts.values())
    start_idx = batch_num * config.batch_size
    end_idx = start_idx + config.batch_size
    lines: list[str] = []
    lines.append("-" * 70)
    lines.append(
        f"BATCH {batch_num + 1}: Words "
        f"{start_idx + 1} - "
        f"{min(end_idx, start_idx + len(batch_words))}"
    )
    lines.append("-" * 70)
    lines.append("")
    # Get translations if requested
    translations: dict[str, str] = {}
    do_translate = (
        config.translate_from is not None
        and config.translate_to is not None
    )
    if do_translate:
        words_to_translate = [word for word, _ in batch_words]
        translation_results = translate_words_batch(
            words_to_translate,
            config.translate_from,  # type: ignore[arg-type]
            config.translate_to,  # type: ignore[arg-type]
        )
        translations = {
            r.source_word: r.translated_word
            for r in translation_results
            if r.success
        }
    lines.append("VOCABULARY TO LEARN:")
    lines.append("")
    lines.extend(
        _format_word_list(
            batch_words, start_idx, total_words, translations,
        )
    )
    lines.append("")
    # Cumulative coverage
    cumulative_count = sum(
        ctx.word_counts[w]
        for w in cumulative_words
        if w in ctx.word_counts
    )
    coverage = (cumulative_count / total_words) * 100
    lines.append(
        "After learning these words, "
        f"you'll recognize ~{coverage:.1f}% of the text"
    )
    lines.append("")
    # Excerpts
    lines.append("PRACTICE EXCERPTS:")
    lines.append(
        "(Excerpts where your learned vocabulary "
        "is most concentrated)"
    )
    lines.append("")
    excerpts = find_best_excerpt(
        ctx.text,
        cumulative_words,
        config.excerpt_length,
        case_sensitive=config.case_sensitive,
        top_n=config.excerpts_per_batch,
    )
    for j, excerpt in enumerate(excerpts, 1):
        lines.append(
            f"  Excerpt {j} "
            f"({excerpt.match_percentage:.1f}% known words):"
        )
        lines.append(f'  "{excerpt.excerpt}"')
        lines.append("")
    return lines
 def generate_learning_lesson(
    text: str,
-    *,
+    config: LessonConfig | None = None,
    batch_size: int = 20,
    num_batches: int = 1,
    excerpt_length: int = 30,
    excerpts_per_batch: int = 3,
    stopwords: frozenset[str] | None = None,
    skip_default_stopwords: bool = False,
    skip_numbers: bool = True,
    case_sensitive: bool = False,
    context_words: int = 5,
    translate_from: str | None = None,
    translate_to: str | None = None,
 ) -> str:
    """Generate a learning lesson from text.
    Args:
        text: The source text to analyze.
-        batch_size: Number of words per learning batch.
+        config: Lesson configuration. Uses defaults if None.
        num_batches: Number of batches to generate.
        excerpt_length: Length of each excerpt in words.
        excerpts_per_batch: Number of excerpts to find per batch.
        stopwords: Custom stopwords to skip (in addition to defaults).
        skip_default_stopwords: If True, don't filter out default English stopwords.
        skip_numbers: If True, filter out numeric words (default: True).
        case_sensitive: If True, treat words case-sensitively.
        context_words: Words of context to include around excerpts.
        translate_from: Source language code for translation (e.g., 'la', 'pl').
        translate_to: Target language code for translation (e.g., 'en').
    Returns:
        Formatted learning lesson as a string.
    """
-    # Combine stopwords
+    if config is None:
-    all_stopwords: frozenset[str]
+        config = LessonConfig()
    if skip_default_stopwords:
        all_stopwords = stopwords or frozenset()
    else:
        all_stopwords = DEFAULT_STOPWORDS_EN | (stopwords or frozenset())
-    # Analyze text for word frequencies
+    all_stopwords = _resolve_stopwords(config)
-    word_counts = analyze_text(text, case_sensitive=case_sensitive)
+    word_counts = analyze_text(
        text, case_sensitive=config.case_sensitive,
    )
    # Filter out stopwords and get sorted words
    filtered_words = [
        (word, count)
        for word, count in word_counts.most_common()
        if word.lower() not in all_stopwords
        and len(word) > 1
-        and not (skip_numbers and word.isdigit())
+        and not (config.skip_numbers and word.isdigit())
    ]
    total_words = sum(word_counts.values())
@ -241,125 +406,62 @@ def generate_learning_lesson(
    lines.append("LANGUAGE LEARNING LESSON")
    lines.append("=" * 70)
    lines.append(
-        f"Source text: {total_words:,} total words, {len(word_counts):,} unique words"
+        f"Source text: {total_words:,} total words, "
        f"{len(word_counts):,} unique words"
    )
    if all_stopwords:
        lines.append(
-            f"After filtering {len(all_stopwords)} stopwords: {len(filtered_words):,} vocabulary words"
+            f"After filtering {len(all_stopwords)} "
            f"stopwords: {len(filtered_words):,} "
            "vocabulary words"
        )
    else:
-        lines.append(f"Vocabulary words: {len(filtered_words):,}")
+        lines.append(
            f"Vocabulary words: {len(filtered_words):,}",
        )
-    # Handle translation setup
+    actual_from, actual_to = _detect_translation_language(
-    actual_translate_from = translate_from
+        text, config, lines,
-    actual_translate_to = translate_to or "en"  # Default to English
+    )
-
+    do_translate = (
-    # Auto-detect language if translation is enabled but source not specified
+        actual_from is not None and actual_to is not None
-    if translate_from == "auto" or (translate_to and not translate_from):
+    )
        detected = detect_language(text)
        if detected:
            actual_translate_from = detected
            lines.append(f"Detected language: {detected}")
            # Note: langdetect doesn't support Latin (often detected as Italian)
            # If detection seems wrong, use --translate-from to override
        else:
            lines.append(
                "Warning: Could not detect language "
                "(install langdetect: pip install langdetect)"
            )
            actual_translate_from = None
    do_translate = actual_translate_from is not None and actual_translate_to is not None
    if do_translate:
-        lines.append(f"Translation: {actual_translate_from} -> {actual_translate_to}")
+        lines.append(
-
+            f"Translation: {actual_from} -> {actual_to}",
        )
    lines.append("")
-    # Generate batches
+    # Create resolved config with detected translation
    resolved_config = _replace_dc(
        config,
        translate_from=actual_from,
        translate_to=actual_to,
    )
    ctx = _LessonContext(
        text=text,
        word_counts=word_counts,
        config=resolved_config,
    )
    cumulative_words: list[str] = []
-
+    for batch_num in range(config.num_batches):
-    for batch_num in range(num_batches):
+        start_idx = batch_num * config.batch_size
-        start_idx = batch_num * batch_size
+        end_idx = start_idx + config.batch_size
        end_idx = start_idx + batch_size
        if start_idx >= len(filtered_words):
            break
        batch_words = filtered_words[start_idx:end_idx]
        cumulative_words.extend(word for word, _ in batch_words)
-        lines.append("-" * 70)
+        lines.extend(
-        lines.append(
+            _generate_batch_section(
-            f"BATCH {batch_num + 1}: Words {start_idx + 1} - {min(end_idx, len(filtered_words))}"
+                ctx,
-        )
+                batch_num,
-        lines.append("-" * 70)
+                batch_words,
-        lines.append("")
+                cumulative_words,
        # Get translations if requested
        translations: dict[str, str] = {}
        if do_translate:
            words_to_translate = [word for word, _ in batch_words]
            translation_results = translate_words_batch(
                words_to_translate,
                actual_translate_from,  # type: ignore[arg-type]
                actual_translate_to,  # type: ignore[arg-type]
            )
            translations = {
                r.source_word: r.translated_word
                for r in translation_results
                if r.success
            }
        # Word list with frequencies
        lines.append("VOCABULARY TO LEARN:")
        lines.append("")
        if do_translate and translations:
            # Include translations in output
            for i, (word, count) in enumerate(batch_words, start=start_idx + 1):
                percentage = (count / total_words) * 100
                trans = translations.get(word, "?")
                lines.append(
                    f"  {i:3}. {word:<20} -> {trans:<20} ({count:,} occurrences, {percentage:.2f}%)"
                )
        else:
            for i, (word, count) in enumerate(batch_words, start=start_idx + 1):
                percentage = (count / total_words) * 100
                lines.append(
                    f"  {i:3}. {word:<20} ({count:,} occurrences, {percentage:.2f}%)"
                )
        lines.append("")
        # Calculate cumulative coverage
        cumulative_count = sum(
            word_counts[word] for word in cumulative_words if word in word_counts
        )
        coverage = (cumulative_count / total_words) * 100
        lines.append(
            f"After learning these words, you'll recognize ~{coverage:.1f}% of the text"
        )
        lines.append("")
        # Find excerpts using cumulative words
        lines.append("PRACTICE EXCERPTS:")
        lines.append("(Excerpts where your learned vocabulary is most concentrated)")
        lines.append("")
        excerpts = find_best_excerpt(
            text,
            cumulative_words,
            excerpt_length,
            case_sensitive=case_sensitive,
            top_n=excerpts_per_batch,
        )
        for j, excerpt in enumerate(excerpts, 1):
            lines.append(
                f"  Excerpt {j} ({excerpt.match_percentage:.1f}% known words):"
            )
            lines.append(f'  "{excerpt.excerpt}"')
            lines.append("")
    # Summary
    lines.append("=" * 70)
@ -368,14 +470,25 @@ def generate_learning_lesson(
    if cumulative_words:
        final_coverage = sum(
-            word_counts[word] for word in cumulative_words if word in word_counts
+            word_counts[w]
            for w in cumulative_words
            if w in word_counts
        )
-        final_percentage = (final_coverage / total_words) * 100
+        final_pct = (final_coverage / total_words) * 100
-        lines.append(f"Total vocabulary words learned: {len(cumulative_words)}")
+        lines.append(
-        lines.append(f"Text coverage: {final_percentage:.1f}%")
+            "Total vocabulary words learned: "
            f"{len(cumulative_words)}"
        )
        lines.append(f"Text coverage: {final_pct:.1f}%")
        lines.append("")
-        lines.append("TIP: Focus on understanding the excerpts first, then read")
+        lines.append(
-        lines.append("more of the original text as your vocabulary grows!")
+            "TIP: Focus on understanding the excerpts "
            "first, then read"
        )
        lines.append(
            "more of the original text as your "
            "vocabulary grows!"
        )
    return "\n".join(lines)
@ -475,7 +588,10 @@ def main(argv: Sequence[str] | None = None) -> int:
        "--translate-from",
        type=str,
        metavar="LANG",
-        help="Source language code (e.g., 'la', 'pl', 'de'). If omitted, auto-detected.",
+        help=(
            "Source language code (e.g., 'la', 'pl'). "
            "If omitted, auto-detected."
        ),
    )
    parser.add_argument(
        "--translate-to",
@ -496,27 +612,22 @@ def main(argv: Sequence[str] | None = None) -> int:
    args = parser.parse_args(argv)
    try:
-        # Get input text
+        text = args.text or read_file(args.file)
        if args.text:
            text = args.text
        else:
            text = read_file(args.file)
        # Load custom stopwords if provided
        custom_stopwords = load_stopwords(args.stopwords)
        # Determine translation settings
        # Translation enabled by default, --no-translate disables it
        translate_from: str | None = None
        translate_to: str | None = None
        if not args.no_translate:
-            translate_from = args.translate_from or "auto"  # "auto" triggers detection
+            translate_from = (
                args.translate_from or "auto"
            )
            translate_to = args.translate_to
-        # Generate lesson
+        config = LessonConfig(
        lesson = generate_learning_lesson(
            text,
            batch_size=args.batch_size,
            num_batches=args.batches,
            excerpt_length=args.excerpt_length,
@ -528,19 +639,26 @@ def main(argv: Sequence[str] | None = None) -> int:
            translate_from=translate_from,
            translate_to=translate_to,
        )
        lesson = generate_learning_lesson(text, config)
        # Output
        if args.output:
-            Path(args.output).write_text(lesson, encoding="utf-8")
+            Path(args.output).write_text(
-            print(f"Lesson written to {args.output}")
+                lesson, encoding="utf-8",
            )
            logger.info(
                "Lesson written to %s", args.output,
            )
        else:
-            print(lesson)
+            logger.info(lesson)
-    except FileNotFoundError as e:
+    except FileNotFoundError:
-        print(f"Error: File not found - {e}", file=sys.stderr)
+        logger.exception("Error: File not found")
        return 1
-    except UnicodeDecodeError as e:
+    except UnicodeDecodeError:
-        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)
+        logger.exception(
            "Error: Could not decode file as UTF-8",
        )
        return 1
    return 0
--- a/python_pkg/word_frequency/tests/test_analyzer.py
+++ b/python_pkg/word_frequency/tests/test_analyzer.py
@ -3,8 +3,11 @@
 from __future__ import annotations
 from collections import Counter
 from pathlib import Path
 import time
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from pathlib import Path
 import pytest
@ -251,12 +254,13 @@ class TestMain:
        assert exit_code == 0
        assert "Unique words: 3" in captured.out
-    def test_file_not_found_error(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_file_not_found_error(
        self, caplog: pytest.LogCaptureFixture
    ) -> None:
        """Test error handling for missing file."""
        exit_code = main(["--file", "/nonexistent/file.txt"])
        captured = capsys.readouterr()
        assert exit_code == 1
-        assert "Error" in captured.err
+        assert "File not found" in caplog.text
 class TestPerformance:
@ -283,7 +287,7 @@ class TestPerformance:
        assert elapsed < 10.0, f"Analysis took {elapsed:.2f}s, expected < 10s"
        assert "word0" in result  # Most common word should be present
-    def test_bible_sized_text_performance(self, tmp_path: Path) -> None:
+    def test_bible_sized_text_performance(self) -> None:
        """Test with Bible-sized text (~800k words)."""
        # Generate text similar in size to the Bible
        base_words = ["the", "and", "of", "to", "in", "a", "that", "is", "was", "for"]
--- a/python_pkg/word_frequency/tests/test_anki_generator.py
+++ b/python_pkg/word_frequency/tests/test_anki_generator.py
@ -10,6 +10,7 @@ import pytest
 try:
    from python_pkg.word_frequency.anki_generator import (
        DeckInput,
        find_word_contexts,
        generate_anki_deck,
        main,
@ -20,6 +21,7 @@ except ImportError:
    sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
    from python_pkg.word_frequency.anki_generator import (
        DeckInput,
        find_word_contexts,
        generate_anki_deck,
        main,
@ -77,7 +79,7 @@ class TestParseVocabularyCurveOutput:
    def test_parse_length_1(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for length 1."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+        excerpt, excerpt_words, _all_vocab = parse_vocabulary_curve_output(
            sample_vocabulary_output, 1
        )
        assert excerpt == "the"
@ -85,7 +87,7 @@ class TestParseVocabularyCurveOutput:
    def test_parse_length_2(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for length 2."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+        excerpt, excerpt_words, _all_vocab = parse_vocabulary_curve_output(
            sample_vocabulary_output, 2
        )
        assert excerpt == "the dog"
@ -93,7 +95,7 @@ class TestParseVocabularyCurveOutput:
    def test_parse_length_3(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for length 3."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+        excerpt, excerpt_words, _all_vocab = parse_vocabulary_curve_output(
            sample_vocabulary_output, 3
        )
        assert excerpt == "the quick fox"
@ -104,7 +106,7 @@ class TestParseVocabularyCurveOutput:
    def test_parse_nonexistent_length(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for non-existent length."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+        excerpt, excerpt_words, _all_vocab = parse_vocabulary_curve_output(
            sample_vocabulary_output, 100
        )
        assert excerpt == ""
@ -121,7 +123,7 @@ hello;1
 world;2
 VOCAB_DUMP_END
 """
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(output, 2)
+        _excerpt, _excerpt_words, all_vocab = parse_vocabulary_curve_output(output, 2)
        assert all_vocab == [("hello", 1), ("world", 2)]
@ -168,10 +170,12 @@ class TestGenerateAnkiDeck:
                MagicMock(success=True, source_word="hello", translated_word="hola")
            ]
            result = generate_anki_deck(
-                [("hello", 1)],
+                DeckInput(
-                source_lang="en",
+                    words_with_ranks=[("hello", 1)],
-                target_lang="es",
+                    source_lang="en",
-                deck_name="TestDeck",
+                    target_lang="es",
                    deck_name="TestDeck",
                ),
            )
        assert "#separator:semicolon" in result
@ -188,9 +192,11 @@ class TestGenerateAnkiDeck:
                MagicMock(success=True, source_word="world", translated_word="mundo"),
            ]
            result = generate_anki_deck(
-                [("hello", 1), ("world", 2)],
+                DeckInput(
-                source_lang="en",
+                    words_with_ranks=[("hello", 1), ("world", 2)],
-                target_lang="es",
+                    source_lang="en",
                    target_lang="es",
                ),
            )
        # Check that words and translations are present
@ -208,9 +214,11 @@ class TestGenerateAnkiDeck:
                MagicMock(success=True, source_word="test", translated_word="prueba")
            ]
            result = generate_anki_deck(
-                [("test", 42)],
+                DeckInput(
-                source_lang="en",
+                    words_with_ranks=[("test", 42)],
-                target_lang="es",
+                    source_lang="en",
                    target_lang="es",
                ),
            )
        assert "#42" in result
@ -226,9 +234,11 @@ class TestGenerateAnkiDeck:
                )
            ]
            result = generate_anki_deck(
-                [("test;word", 1)],
+                DeckInput(
-                source_lang="en",
+                    words_with_ranks=[("test;word", 1)],
-                target_lang="es",
+                    source_lang="en",
                    target_lang="es",
                ),
            )
        # Semicolons should be replaced with commas
@ -244,10 +254,12 @@ class TestGenerateAnkiDeck:
            ]
            contexts = {"hello": "...say hello to..."}
            result = generate_anki_deck(
-                [("hello", 1)],
+                DeckInput(
-                source_lang="en",
+                    words_with_ranks=[("hello", 1)],
-                target_lang="es",
+                    source_lang="en",
-                contexts=contexts,
+                    target_lang="es",
                    contexts=contexts,
                ),
                include_context=True,
            )
@ -257,9 +269,11 @@ class TestGenerateAnkiDeck:
    def test_no_translate_flag(self) -> None:
        """Test that no_translate skips translation."""
        result = generate_anki_deck(
-            [("hello", 1), ("world", 2)],
+            DeckInput(
-            source_lang="en",
+                words_with_ranks=[("hello", 1), ("world", 2)],
-            target_lang="es",
+                source_lang="en",
                target_lang="es",
            ),
            no_translate=True,
        )
@ -280,7 +294,7 @@ class TestMain:
        result = main(["--file", "nonexistent.txt", "--length", "10"])
        assert result == 1
-    def test_help_flag(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_help_flag(self) -> None:
        """Test that --help works."""
        with pytest.raises(SystemExit) as exc_info:
            main(["--help"])
@ -309,7 +323,7 @@ class TestIntegration:
        ) as mock_translate:
            # Mock translation to avoid network calls
            def mock_translate_fn(
-                words: list[str], from_lang: str, to_lang: str
+                words: list[str], _from_lang: str, _to_lang: str
            ) -> list[MagicMock]:
                return [
                    MagicMock(success=True, source_word=w, translated_word=f"[{w}]")
@ -324,6 +338,8 @@ class TestIntegration:
                    str(sample_text_file),
                    "--length",
                    "5",
                    "--from",
                    "en",
                    "--output",
                    str(output_file),
                    "--quiet",
@ -337,9 +353,11 @@ class TestIntegration:
        assert "#separator:semicolon" in content
    def test_cli_with_sample_file(
-        self, sample_text_file: Path, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+        self, sample_text_file: Path, tmp_path: Path, caplog: pytest.LogCaptureFixture
    ) -> None:
        """Test CLI with actual file."""
        import logging
        from python_pkg.word_frequency.anki_generator import C_EXECUTABLE
        if not C_EXECUTABLE.exists():
@ -347,9 +365,12 @@ class TestIntegration:
        output_file = tmp_path / "anki_output.txt"
-        with patch(
+        with (
-            "python_pkg.word_frequency.anki_generator.translate_words_batch"
+            caplog.at_level(logging.INFO),
-        ) as mock_translate:
+            patch(
                "python_pkg.word_frequency.anki_generator.translate_words_batch"
            ) as mock_translate,
        ):
            mock_translate.return_value = [
                MagicMock(success=True, source_word="the", translated_word="le")
            ]
@ -360,14 +381,15 @@ class TestIntegration:
                    str(sample_text_file),
                    "--length",
                    "1",
                    "--from",
                    "en",
                    "--output",
                    str(output_file),
                ]
            )
        assert result == 0
-        captured = capsys.readouterr()
+        assert "FLASHCARD GENERATION COMPLETE" in caplog.text
        assert "FLASHCARD GENERATION COMPLETE" in captured.out
 if __name__ == "__main__":
--- a/python_pkg/word_frequency/tests/test_excerpt_finder.py
+++ b/python_pkg/word_frequency/tests/test_excerpt_finder.py
@ -2,13 +2,18 @@
 from __future__ import annotations
-from pathlib import Path
+import logging
 import time
 from typing import TYPE_CHECKING
 import pytest
 if TYPE_CHECKING:
    from pathlib import Path
 from python_pkg.word_frequency.excerpt_finder import (
    ExcerptResult,
    ExcerptSearchOptions,
    find_best_excerpt,
    find_best_excerpt_with_context,
    format_excerpt_results,
@ -146,7 +151,8 @@ class TestFindBestExcerptWithContext:
        """Test with zero context (should behave like find_best_excerpt)."""
        text = "a b c d e f g"
        result = find_best_excerpt_with_context(
-            text, ["c"], excerpt_length=1, context_words=0
+            text, ["c"], excerpt_length=1,
            options=ExcerptSearchOptions(context_words=0),
        )
        assert result[0].excerpt == "c"
@ -155,7 +161,8 @@ class TestFindBestExcerptWithContext:
        """Test with context words."""
        text = "a b c d e f g"
        result = find_best_excerpt_with_context(
-            text, ["d"], excerpt_length=1, context_words=2
+            text, ["d"], excerpt_length=1,
            options=ExcerptSearchOptions(context_words=2),
        )
        # "d" at index 3, with context should include 2 words before and after
@ -167,7 +174,8 @@ class TestFindBestExcerptWithContext:
        """Test context doesn't go before start of text."""
        text = "a b c d e"
        result = find_best_excerpt_with_context(
-            text, ["a"], excerpt_length=1, context_words=3
+            text, ["a"], excerpt_length=1,
            options=ExcerptSearchOptions(context_words=3),
        )
        # Can't go before "a", so just get words after
@ -178,7 +186,8 @@ class TestFindBestExcerptWithContext:
        """Test context doesn't go beyond end of text."""
        text = "a b c d e"
        result = find_best_excerpt_with_context(
-            text, ["e"], excerpt_length=1, context_words=3
+            text, ["e"], excerpt_length=1,
            options=ExcerptSearchOptions(context_words=3),
        )
        # Can't go beyond "e"
@ -240,33 +249,33 @@ class TestFormatExcerptResults:
 class TestMain:
    """Tests for main CLI function."""
-    def test_text_and_words_input(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_text_and_words_input(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test --text and --words options."""
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            ["--text", "hello world hello", "--words", "hello", "--length", "2"]
+            exit_code = main(
-        )
+                ["--text", "hello world hello", "--words", "hello", "--length", "2"]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 0
-        assert "hello" in captured.out
+        assert "hello" in caplog.text
    def test_file_input(
-        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+        self, tmp_path: Path, caplog: pytest.LogCaptureFixture
    ) -> None:
        """Test --file input option."""
        test_file = tmp_path / "test.txt"
        test_file.write_text("hello world hello world", encoding="utf-8")
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            ["--file", str(test_file), "--words", "hello", "--length", "2"]
+            exit_code = main(
-        )
+                ["--file", str(test_file), "--words", "hello", "--length", "2"]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 0
-        assert "hello" in captured.out
+        assert "hello" in caplog.text
    def test_words_file_input(
-        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+        self, tmp_path: Path, caplog: pytest.LogCaptureFixture
    ) -> None:
        """Test --words-file option."""
        text_file = tmp_path / "text.txt"
@ -274,91 +283,91 @@ class TestMain:
        text_file.write_text("hello world hello world", encoding="utf-8")
        words_file.write_text("hello\nworld\n", encoding="utf-8")
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            [
+            exit_code = main(
-                "--file",
+                [
-                str(text_file),
+                    "--file",
-                "--words-file",
+                    str(text_file),
-                str(words_file),
+                    "--words-file",
-                "--length",
+                    str(words_file),
-                "2",
+                    "--length",
-            ]
+                    "2",
-        )
+                ]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 0
-        assert "100.00%" in captured.out  # Both words match
+        assert "100.00%" in caplog.text  # Both words match
-    def test_top_option(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_top_option(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test --top option."""
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            [
+            exit_code = main(
-                "--text",
+                [
-                "a b c d e f",
+                    "--text",
-                "--words",
+                    "a b c d e f",
-                "a",
+                    "--words",
-                "b",
+                    "a",
-                "--length",
+                    "b",
-                "2",
+                    "--length",
-                "--top",
+                    "2",
-                "3",
+                    "--top",
-            ]
+                    "3",
-        )
+                ]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 0
        # Should show multiple results
-        assert "Result #1" in captured.out
+        assert "Result #1" in caplog.text
-    def test_context_option(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_context_option(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test --context option."""
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            [
+            exit_code = main(
-                "--text",
+                [
-                "a b c d e f g",
+                    "--text",
-                "--words",
+                    "a b c d e f g",
-                "d",
+                    "--words",
-                "--length",
+                    "d",
-                "1",
+                    "--length",
-                "--context",
+                    "1",
-                "2",
+                    "--context",
-            ]
+                    "2",
-        )
+                ]
-        capsys.readouterr()
+            )
        assert exit_code == 0
        # Excerpt should include context words
-    def test_case_sensitive_option(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_case_sensitive_option(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test --case-sensitive option."""
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            [
+            exit_code = main(
-                "--text",
+                [
-                "Hello HELLO hello",
+                    "--text",
-                "--words",
+                    "Hello HELLO hello",
-                "hello",
+                    "--words",
-                "--length",
+                    "hello",
-                "1",
+                    "--length",
-                "--case-sensitive",
+                    "1",
-            ]
+                    "--case-sensitive",
-        )
+                ]
-        capsys.readouterr()
+            )
        assert exit_code == 0
        # Only lowercase "hello" should match
-    def test_file_not_found(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_file_not_found(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test error handling for missing file."""
-        exit_code = main(
+        with caplog.at_level(logging.ERROR):
-            ["--file", "/nonexistent/file.txt", "--words", "hello", "--length", "2"]
+            exit_code = main(
-        )
+                ["--file", "/nonexistent/file.txt", "--words", "hello", "--length", "2"]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 1
-        assert "Error" in captured.err
+        assert "Error" in caplog.text
    def test_empty_words_file(
-        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+        self, tmp_path: Path, caplog: pytest.LogCaptureFixture
    ) -> None:
        """Test error when words file is empty."""
        text_file = tmp_path / "text.txt"
@ -366,20 +375,20 @@ class TestMain:
        text_file.write_text("hello world", encoding="utf-8")
        words_file.write_text("", encoding="utf-8")
-        exit_code = main(
+        with caplog.at_level(logging.ERROR):
-            [
+            exit_code = main(
-                "--file",
+                [
-                str(text_file),
+                    "--file",
-                "--words-file",
+                    str(text_file),
-                str(words_file),
+                    "--words-file",
-                "--length",
+                    str(words_file),
-                "2",
+                    "--length",
-            ]
+                    "2",
-        )
+                ]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 1
-        assert "No target words" in captured.err
+        assert "No target words" in caplog.text
 class TestPerformance:
--- a/python_pkg/word_frequency/tests/test_learning_pipe.py
+++ b/python_pkg/word_frequency/tests/test_learning_pipe.py
@ -2,16 +2,20 @@
 from __future__ import annotations
-from pathlib import Path
+import logging
 import time
 from typing import TYPE_CHECKING
 from unittest.mock import MagicMock, patch
 import pytest
 if TYPE_CHECKING:
    from pathlib import Path
 import python_pkg.word_frequency.learning_pipe as learning_pipe_module
 from python_pkg.word_frequency.learning_pipe import (
    DEFAULT_STOPWORDS_EN,
    LessonConfig,
    generate_learning_lesson,
    load_stopwords,
    main,
@ -23,7 +27,7 @@ if TYPE_CHECKING:
@pytest.fixture
-def mock_translation() -> Generator[MagicMock, None, None]:
+def _mock_translation() -> Generator[MagicMock, None, None]:
    """Mock translation to avoid requiring argostranslate."""
    def fake_batch_translate(
@ -31,7 +35,7 @@ def mock_translation() -> Generator[MagicMock, None, None]:
        from_lang: str,
        to_lang: str,
        *,
-        use_cache: bool = True,
+        _use_cache: bool = True,
    ) -> list[TranslationResult]:
        """Fake batch translation that returns word with prefix."""
        return [
@ -95,7 +99,7 @@ class TestGenerateLearningLesson:
        """Test basic lesson generation."""
        text = "hello world hello hello world test test test test"
        result = generate_learning_lesson(
-            text, batch_size=3, num_batches=1, skip_default_stopwords=True
+            text, LessonConfig(batch_size=3, num_batches=1, skip_default_stopwords=True)
        )
        assert "LANGUAGE LEARNING LESSON" in result
@ -106,7 +110,7 @@ class TestGenerateLearningLesson:
        """Test generation with multiple batches."""
        text = " ".join(f"word{i}" * (100 - i) for i in range(20))
        result = generate_learning_lesson(
-            text, batch_size=5, num_batches=3, skip_default_stopwords=True
+            text, LessonConfig(batch_size=5, num_batches=3, skip_default_stopwords=True)
        )
        assert "BATCH 1" in result
@ -116,7 +120,9 @@ class TestGenerateLearningLesson:
    def test_stopwords_filtering(self) -> None:
        """Test that default stopwords are filtered."""
        text = "the the the hello world"
-        result = generate_learning_lesson(text, batch_size=5, num_batches=1)
+        result = generate_learning_lesson(
            text, LessonConfig(batch_size=5, num_batches=1)
        )
        # "the" should be filtered, "hello" and "world" should appear
        lines = result.split("\n")
@ -139,7 +145,7 @@ class TestGenerateLearningLesson:
        """Test disabling default stopword filtering."""
        text = "the the the hello"
        result = generate_learning_lesson(
-            text, batch_size=5, num_batches=1, skip_default_stopwords=True
+            text, LessonConfig(batch_size=5, num_batches=1, skip_default_stopwords=True)
        )
        assert "the" in result.lower()
@ -148,7 +154,7 @@ class TestGenerateLearningLesson:
        """Test that numbers are filtered by default."""
        text = "123 123 123 hello world"
        result = generate_learning_lesson(
-            text, batch_size=5, num_batches=1, skip_default_stopwords=True
+            text, LessonConfig(batch_size=5, num_batches=1, skip_default_stopwords=True)
        )
        # Check vocabulary section doesn't include "123"
@ -162,10 +168,12 @@ class TestGenerateLearningLesson:
        text = "123 123 123 hello"
        result = generate_learning_lesson(
            text,
-            batch_size=5,
+            LessonConfig(
-            num_batches=1,
+                batch_size=5,
-            skip_default_stopwords=True,
+                num_batches=1,
-            skip_numbers=False,
+                skip_default_stopwords=True,
                skip_numbers=False,
            ),
        )
        assert "123" in result
@ -174,7 +182,7 @@ class TestGenerateLearningLesson:
        """Test that coverage percentage is calculated."""
        text = "hello hello hello world world test"
        result = generate_learning_lesson(
-            text, batch_size=3, num_batches=1, skip_default_stopwords=True
+            text, LessonConfig(batch_size=3, num_batches=1, skip_default_stopwords=True)
        )
        assert "recognize" in result.lower()
@ -185,11 +193,13 @@ class TestGenerateLearningLesson:
        text = "hello world hello world hello world test test test"
        result = generate_learning_lesson(
            text,
-            batch_size=2,
+            LessonConfig(
-            num_batches=1,
+                batch_size=2,
-            excerpt_length=3,
+                num_batches=1,
-            excerpts_per_batch=2,
+                excerpt_length=3,
-            skip_default_stopwords=True,
+                excerpts_per_batch=2,
                skip_default_stopwords=True,
            ),
        )
        assert "PRACTICE EXCERPTS" in result
@ -200,45 +210,45 @@ class TestMain:
    """Tests for main CLI function."""
    def test_basic_text_input(
-        self, capsys: pytest.CaptureFixture[str], mock_translation: None
+        self, caplog: pytest.LogCaptureFixture, _mock_translation: None
    ) -> None:
        """Test with text input."""
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            [
+            exit_code = main(
-                "--text",
+                [
-                "hello world hello world test test test",
+                    "--text",
-                "--batch-size",
+                    "hello world hello world test test test",
-                "3",
+                    "--batch-size",
-                "--no-default-stopwords",
+                    "3",
-            ]
+                    "--no-default-stopwords",
-        )
+                ]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 0
-        assert "LANGUAGE LEARNING LESSON" in captured.out
+        assert "LANGUAGE LEARNING LESSON" in caplog.text
    def test_file_input(
-        self, tmp_path: Path, capsys: pytest.CaptureFixture[str], mock_translation: None
+        self, tmp_path: Path, caplog: pytest.LogCaptureFixture, _mock_translation: None
    ) -> None:
        """Test with file input."""
        test_file = tmp_path / "test.txt"
        test_file.write_text("hello world hello world test", encoding="utf-8")
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            [
+            exit_code = main(
-                "--file",
+                [
-                str(test_file),
+                    "--file",
-                "--batch-size",
+                    str(test_file),
-                "3",
+                    "--batch-size",
-                "--no-default-stopwords",
+                    "3",
-            ]
+                    "--no-default-stopwords",
-        )
+                ]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 0
-        assert "hello" in captured.out.lower()
+        assert "hello" in caplog.text.lower()
-    def test_output_to_file(self, tmp_path: Path, mock_translation: None) -> None:
+    def test_output_to_file(self, tmp_path: Path, _mock_translation: None) -> None:
        """Test outputting to file."""
        output_file = tmp_path / "lesson.txt"
@ -258,7 +268,7 @@ class TestMain:
        assert "LANGUAGE LEARNING LESSON" in content
    def test_custom_stopwords(
-        self, tmp_path: Path, capsys: pytest.CaptureFixture[str], mock_translation: None
+        self, tmp_path: Path, _mock_translation: None
    ) -> None:
        """Test with custom stopwords file."""
        stopwords_file = tmp_path / "stop.txt"
@ -275,41 +285,40 @@ class TestMain:
                "5",
            ]
        )
        capsys.readouterr()
        assert exit_code == 0
        # "hello" should be filtered by custom stopwords
    def test_multiple_batches_option(
-        self, capsys: pytest.CaptureFixture[str], mock_translation: None
+        self, caplog: pytest.LogCaptureFixture, _mock_translation: None
    ) -> None:
        """Test --batches option."""
        text = " ".join(f"word{i}" * (50 - i) for i in range(30))
-        exit_code = main(
+        with caplog.at_level(logging.INFO):
-            [
+            exit_code = main(
-                "--text",
+                [
-                text,
+                    "--text",
-                "--batch-size",
+                    text,
-                "5",
+                    "--batch-size",
-                "--batches",
+                    "5",
-                "3",
+                    "--batches",
-                "--no-default-stopwords",
+                    "3",
-            ]
+                    "--no-default-stopwords",
-        )
+                ]
-        captured = capsys.readouterr()
+            )
        assert exit_code == 0
-        assert "BATCH 1" in captured.out
+        assert "BATCH 1" in caplog.text
-        assert "BATCH 2" in captured.out
+        assert "BATCH 2" in caplog.text
-        assert "BATCH 3" in captured.out
+        assert "BATCH 3" in caplog.text
-    def test_file_not_found(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_file_not_found(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test error handling for missing file."""
-        exit_code = main(["--file", "/nonexistent/file.txt"])
+        with caplog.at_level(logging.ERROR):
-        captured = capsys.readouterr()
+            exit_code = main(["--file", "/nonexistent/file.txt"])
        assert exit_code == 1
-        assert "Error" in captured.err
+        assert "Error" in caplog.text
 class TestPerformance:
@ -324,10 +333,12 @@ class TestPerformance:
        start_time = time.perf_counter()
        result = generate_learning_lesson(
            large_text,
-            batch_size=50,
+            LessonConfig(
-            num_batches=5,
+                batch_size=50,
-            excerpt_length=30,
+                num_batches=5,
-            skip_default_stopwords=True,
+                excerpt_length=30,
                skip_default_stopwords=True,
            ),
        )
        elapsed = time.perf_counter() - start_time
@ -358,9 +369,11 @@ class TestTranslationIntegration:
        text = "hello world hello world hello"
        result = generate_learning_lesson(
            text,
-            batch_size=5,
+            LessonConfig(
-            num_batches=1,
+                batch_size=5,
-            skip_default_stopwords=True,
+                num_batches=1,
                skip_default_stopwords=True,
            ),
        )
        assert "hello" in result
@ -368,17 +381,19 @@ class TestTranslationIntegration:
        # Should not have translation arrows
        assert " -> " not in result or "Translation" not in result
-    def test_lesson_with_translation_params(self, mock_translation: None) -> None:
+    def test_lesson_with_translation_params(self, _mock_translation: None) -> None:
        """Test that translation params are accepted."""
        text = "hello world hello world hello"
        # This should work with mocked translation
        result = generate_learning_lesson(
            text,
-            batch_size=5,
+            LessonConfig(
-            num_batches=1,
+                batch_size=5,
-            skip_default_stopwords=True,
+                num_batches=1,
-            translate_from="en",
+                skip_default_stopwords=True,
-            translate_to="es",
+                translate_from="en",
                translate_to="es",
            ),
        )
        # The lesson should still be generated
@ -386,7 +401,7 @@ class TestTranslationIntegration:
        assert "hello" in result
    def test_main_with_translate_flags(
-        self, tmp_path: Path, mock_translation: None
+        self, tmp_path: Path, _mock_translation: None
    ) -> None:
        """Test that main accepts translation flags."""
        text_file = tmp_path / "test.txt"
@ -408,36 +423,42 @@ class TestTranslationIntegration:
        assert result == 0
    def test_translate_to_defaults_to_english(
-        self, capsys: pytest.CaptureFixture[str], mock_translation: None
+        self, _mock_translation: None
    ) -> None:
        """Test that translate_to defaults to 'en' when using auto-detection."""
        text = "hello world"
-        # When using --translate flag (translate_from="auto"), translate_to defaults to "en"
+        # When using --translate flag (translate_from="auto"),
-        result = generate_learning_lesson(
+        # translate_to defaults to "en"
-            text,
+        with patch.object(
-            batch_size=5,
+            learning_pipe_module, "detect_language", return_value="es"
-            num_batches=1,
+        ):
-            skip_default_stopwords=True,
+            result = generate_learning_lesson(
-            translate_from="auto",  # Auto-detect source language
+                text,
-            translate_to=None,  # Should default to English
+                LessonConfig(
-        )
+                    batch_size=5,
                    num_batches=1,
                    skip_default_stopwords=True,
                    translate_from="auto",  # Auto-detect source language
                    translate_to=None,  # Should default to English
                ),
            )
        # Should have translation output with auto-detected source -> en
        assert "Detected language:" in result
        assert " -> en" in result
-    def test_no_translation_when_both_none(
+    def test_no_translation_when_both_none(self) -> None:
-        self, capsys: pytest.CaptureFixture[str]
+        """Test no translation when both translate params are None."""
    ) -> None:
        """Test no translation happens when both translate_from and translate_to are None."""
        text = "hello world"
        result = generate_learning_lesson(
            text,
-            batch_size=5,
+            LessonConfig(
-            num_batches=1,
+                batch_size=5,
-            skip_default_stopwords=True,
+                num_batches=1,
-            translate_from=None,
+                skip_default_stopwords=True,
-            translate_to=None,
+                translate_from=None,
                translate_to=None,
            ),
        )
        # Should not have translation output
--- a/python_pkg/word_frequency/tests/test_translator.py
+++ b/python_pkg/word_frequency/tests/test_translator.py
@ -61,19 +61,16 @@ class ArgosAvailableMock:
        self.mock_translate_module = MagicMock()
        self.mock_package_module = MagicMock()
        self.mock_parent = MagicMock()
        self.original_available = translator._argos_available
        self._sys_modules_patcher: MagicMock | None = None
        self._ensure_patcher: MagicMock | None = None
        self._lang_patcher: MagicMock | None = None
        self._check_argos_patcher: MagicMock | None = None
        self._argos_module_patcher: MagicMock | None = None
    def __enter__(self) -> MagicMock:
        """Set up the mocks."""
        translator._argos_available = True
        # Set up translate return value
-        if isinstance(self.translate_returns, Exception) or isinstance(
+        if isinstance(self.translate_returns, (Exception, list)):
            self.translate_returns, list
        ):
            self.mock_translate_fn.side_effect = self.translate_returns
        elif self.translate_returns is not None:
            self.mock_translate_fn.return_value = self.translate_returns
@ -96,41 +93,52 @@ class ArgosAvailableMock:
            },
        )
        # Patch the module-level argostranslate reference in translator
        self._argos_module_patcher = patch.object(
            translator, "argostranslate", self.mock_parent, create=True
        )
        # Patch _ensure_argos_installed and _ensure_language_pair to no-op
        self._ensure_patcher = patch.object(
            translator, "_ensure_argos_installed", lambda: None
        )
        self._lang_patcher = patch.object(
-            translator, "_ensure_language_pair", lambda f, t: None
+            translator, "_ensure_language_pair", lambda _f, _t: None
        )
        self._check_argos_patcher = patch.object(
            translator, "_check_argos", return_value=True
        )
        self._sys_modules_patcher.start()  # type: ignore[union-attr]
        self._argos_module_patcher.start()  # type: ignore[union-attr]
        self._ensure_patcher.start()  # type: ignore[union-attr]
        self._lang_patcher.start()  # type: ignore[union-attr]
        self._check_argos_patcher.start()  # type: ignore[union-attr]
        return self.mock_translate_fn
    def __exit__(self, *args: object) -> None:
        """Restore original state."""
        if self._check_argos_patcher:
            self._check_argos_patcher.stop()
        if self._lang_patcher:
            self._lang_patcher.stop()
        if self._ensure_patcher:
            self._ensure_patcher.stop()
        if self._argos_module_patcher:
            self._argos_module_patcher.stop()
        if self._sys_modules_patcher:
            self._sys_modules_patcher.stop()
        translator._argos_available = self.original_available
 # Fixtures
@pytest.fixture
-def mock_argos_unavailable() -> Generator[None, None, None]:
+def _mock_argos_unavailable() -> Generator[None, None, None]:
    """Mock argostranslate being unavailable (for legacy tests)."""
-    original_value = translator._argos_available
+    with patch.object(translator, "_check_argos", return_value=False):
-    translator._argos_available = False
+        yield
    yield
    translator._argos_available = original_value
@pytest.fixture
@ -178,7 +186,7 @@ class TestTranslationResult:
    def test_result_is_tuple(self) -> None:
        """Test that TranslationResult is a namedtuple."""
-        result = TranslationResult("a", "b", "en", "es", True)
+        result = TranslationResult("a", "b", "en", "es", success=True)
        assert isinstance(result, tuple)
        assert len(result) == 6
@ -192,13 +200,15 @@ class TestTranslateWord:
    def test_translate_word_argos_unavailable_raises(self) -> None:
        """Test that translation raises ImportError when argos is unavailable."""
        # Mock _ensure_argos_installed to raise ImportError
-        with patch.object(
+        with (
-            translator,
+            patch.object(
-            "_ensure_argos_installed",
+                translator,
-            side_effect=ImportError("argostranslate not available"),
+                "_ensure_argos_installed",
                side_effect=ImportError("argostranslate not available"),
            ),
            pytest.raises(ImportError, match="argostranslate not available"),
        ):
-            with pytest.raises(ImportError, match="argostranslate not available"):
+            translate_word("hello", "en", "es", use_cache=False)
                translate_word("hello", "en", "es", use_cache=False)
    def test_translate_word_success(self) -> None:
        """Test successful word translation."""
@ -243,13 +253,15 @@ class TestTranslateWords:
    def test_translate_words_argos_unavailable_raises(self) -> None:
        """Test that translating words raises ImportError when argos unavailable."""
-        with patch.object(
+        with (
-            translator,
+            patch.object(
-            "_ensure_argos_installed",
+                translator,
-            side_effect=ImportError("argostranslate not available"),
+                "_ensure_argos_installed",
                side_effect=ImportError("argostranslate not available"),
            ),
            pytest.raises(ImportError, match="argostranslate not available"),
        ):
-            with pytest.raises(ImportError, match="argostranslate not available"):
+            translate_words(["hello", "world"], "en", "es", use_cache=False)
                translate_words(["hello", "world"], "en", "es", use_cache=False)
 # translate_words_batch tests
@ -290,7 +302,7 @@ class TestTranslateWordsBatch:
        assert results[4].translated_word == "cinco"
    def test_batch_fallback_on_mismatch(self) -> None:
-        """Test batch translation falls back to individual when result count mismatches."""
+        """Test batch falls back to individual on result count mismatch."""
        words = ["one", "two", "three", "four"]
        # First call (batch) returns wrong count, subsequent calls are individual
        with ArgosAvailableMock(["wrong", "uno", "dos", "tres", "cuatro"]) as mock:
@ -313,10 +325,11 @@ class TestTranslateWordsBatch:
        mock_parent.translate = mock_translate_module
        mock_parent.package = mock_package_module
        original = translator._argos_available
        translator._argos_available = True
        with (
            patch.object(translator, "_check_argos", return_value=True),
            patch.object(
                translator, "argostranslate", mock_parent, create=True
            ),
            patch.dict(
                "sys.modules",
                {
@ -326,22 +339,22 @@ class TestTranslateWordsBatch:
                },
            ),
            patch.object(translator, "_ensure_argos_installed", lambda: None),
-            patch.object(translator, "_ensure_language_pair", lambda f, t: None),
+            patch.object(translator, "_ensure_language_pair", lambda _f, _t: None),
            pytest.raises(RuntimeError, match="Translation failed"),
        ):
            translate_words_batch(words, "en", "es", use_cache=False)
        translator._argos_available = original
    def test_batch_argos_unavailable_raises(self) -> None:
        """Test that batch translation raises ImportError when argos unavailable."""
-        with patch.object(
+        with (
-            translator,
+            patch.object(
-            "_ensure_argos_installed",
+                translator,
-            side_effect=ImportError("argostranslate not available"),
+                "_ensure_argos_installed",
                side_effect=ImportError("argostranslate not available"),
            ),
            pytest.raises(ImportError, match="argostranslate not available"),
        ):
-            with pytest.raises(ImportError, match="argostranslate not available"):
+            translate_words_batch(["hello", "world"], "en", "es", use_cache=False)
                translate_words_batch(["hello", "world"], "en", "es", use_cache=False)
 # format_translations tests
@ -358,7 +371,7 @@ class TestFormatTranslations:
    def test_format_single_translation(self) -> None:
        """Test formatting single translation."""
        results = [
-            TranslationResult("hello", "hola", "en", "es", True),
+            TranslationResult("hello", "hola", "en", "es", success=True),
        ]
        output = format_translations(results)
@ -369,8 +382,8 @@ class TestFormatTranslations:
    def test_format_multiple_translations(self) -> None:
        """Test formatting multiple translations."""
        results = [
-            TranslationResult("hello", "hola", "en", "es", True),
+            TranslationResult("hello", "hola", "en", "es", success=True),
-            TranslationResult("world", "mundo", "en", "es", True),
+            TranslationResult("world", "mundo", "en", "es", success=True),
        ]
        output = format_translations(results)
@ -382,8 +395,10 @@ class TestFormatTranslations:
    def test_format_with_errors(self) -> None:
        """Test formatting with failed translations."""
        results = [
-            TranslationResult("hello", "hola", "en", "es", True),
+            TranslationResult("hello", "hola", "en", "es", success=True),
-            TranslationResult("xyz", "", "en", "es", False, "Unknown word"),
+            TranslationResult(
                "xyz", "", "en", "es", success=False, error="Unknown word"
            ),
        ]
        output = format_translations(results, show_errors=True)
@ -393,8 +408,10 @@ class TestFormatTranslations:
    def test_format_hide_errors(self) -> None:
        """Test formatting with errors hidden."""
        results = [
-            TranslationResult("hello", "hola", "en", "es", True),
+            TranslationResult("hello", "hola", "en", "es", success=True),
-            TranslationResult("xyz", "", "en", "es", False, "Unknown word"),
+            TranslationResult(
                "xyz", "", "en", "es", success=False, error="Unknown word"
            ),
        ]
        output = format_translations(results, show_errors=False)
@ -408,7 +425,7 @@ class TestFormatTranslations:
 class TestGetInstalledLanguages:
    """Tests for get_installed_languages function."""
-    def test_argos_unavailable(self, mock_argos_unavailable: None) -> None:
+    def test_argos_unavailable(self, _mock_argos_unavailable: None) -> None:
        """Test when argos is unavailable."""
        result = get_installed_languages()
        assert result == []
@ -433,21 +450,22 @@ class TestGetInstalledLanguages:
        mock_parent.translate = mock_translate_module
        mock_parent.package = mock_package_module
-        original = translator._argos_available
+        with (
-        translator._argos_available = True
+            patch.object(translator, "_check_argos", return_value=True),
-
+            patch.object(
-        with patch.dict(
+                translator, "argostranslate", mock_parent, create=True
-            "sys.modules",
+            ),
-            {
+            patch.dict(
-                "argostranslate": mock_parent,
+                "sys.modules",
-                "argostranslate.translate": mock_translate_module,
+                {
-                "argostranslate.package": mock_package_module,
+                    "argostranslate": mock_parent,
-            },
+                    "argostranslate.translate": mock_translate_module,
                    "argostranslate.package": mock_package_module,
                },
            ),
        ):
            result = get_installed_languages()
        translator._argos_available = original
        assert ("en", "English") in result
        assert ("es", "Spanish") in result
@ -458,7 +476,7 @@ class TestGetInstalledLanguages:
 class TestGetAvailablePackages:
    """Tests for get_available_packages function."""
-    def test_argos_unavailable(self, mock_argos_unavailable: None) -> None:
+    def test_argos_unavailable(self, _mock_argos_unavailable: None) -> None:
        """Test when argos is unavailable."""
        result = get_available_packages()
        assert result == []
@ -470,7 +488,7 @@ class TestGetAvailablePackages:
 class TestDownloadLanguages:
    """Tests for download_languages function."""
-    def test_argos_unavailable(self, mock_argos_unavailable: None) -> None:
+    def test_argos_unavailable(self, _mock_argos_unavailable: None) -> None:
        """Test when argos is unavailable."""
        result = download_languages(["en", "es"])
        assert result == {}
@ -503,7 +521,7 @@ class TestReadFile:
 class TestMain:
    """Tests for main CLI function."""
-    def test_argos_unavailable_error(self, mock_argos_unavailable: None) -> None:
+    def test_argos_unavailable_error(self, _mock_argos_unavailable: None) -> None:
        """Test error when argos not installed."""
        result = main(["--text", "hello", "--from", "en", "--to", "es"])
        assert result == 1
@ -517,21 +535,22 @@ class TestMain:
        mock_parent.translate = mock_translate_module
        mock_parent.package = mock_package_module
-        original = translator._argos_available
+        with (
-        translator._argos_available = True
+            patch.object(translator, "_check_argos", return_value=True),
-
+            patch.object(
-        with patch.dict(
+                translator, "argostranslate", mock_parent, create=True
-            "sys.modules",
+            ),
-            {
+            patch.dict(
-                "argostranslate": mock_parent,
+                "sys.modules",
-                "argostranslate.translate": mock_translate_module,
+                {
-                "argostranslate.package": mock_package_module,
+                    "argostranslate": mock_parent,
-            },
+                    "argostranslate.translate": mock_translate_module,
                    "argostranslate.package": mock_package_module,
                },
            ),
        ):
            result = main(["--list-languages"])
        translator._argos_available = original
        assert result == 0
        captured = capsys.readouterr()
        assert "No languages installed" in captured.out
@ -551,21 +570,22 @@ class TestMain:
        mock_parent.translate = mock_translate_module
        mock_parent.package = mock_package_module
-        original = translator._argos_available
+        with (
-        translator._argos_available = True
+            patch.object(translator, "_check_argos", return_value=True),
-
+            patch.object(
-        with patch.dict(
+                translator, "argostranslate", mock_parent, create=True
-            "sys.modules",
+            ),
-            {
+            patch.dict(
-                "argostranslate": mock_parent,
+                "sys.modules",
-                "argostranslate.translate": mock_translate_module,
+                {
-                "argostranslate.package": mock_package_module,
+                    "argostranslate": mock_parent,
-            },
+                    "argostranslate.translate": mock_translate_module,
                    "argostranslate.package": mock_package_module,
                },
            ),
        ):
            result = main(["--list-languages"])
        translator._argos_available = original
        assert result == 0
        captured = capsys.readouterr()
        assert "en" in captured.out
@ -622,7 +642,6 @@ class TestMain:
    def test_translate_output_to_file(
        self,
        tmp_path: Path,
        capsys: pytest.CaptureFixture[str],
    ) -> None:
        """Test outputting translations to file."""
        output_file = tmp_path / "output.txt"
@ -647,7 +666,9 @@ class TestMain:
        assert "hello" in content
        assert "hola" in content
-    def test_no_input_shows_help(self, capsys: pytest.CaptureFixture[str]) -> None:
+    def test_no_input_shows_help(
        self,
    ) -> None:
        """Test that no input shows help."""
        with ArgosAvailableMock():
            result = main([])
--- a/python_pkg/word_frequency/tests/test_vocabulary_curve.py
+++ b/python_pkg/word_frequency/tests/test_vocabulary_curve.py
@ -89,7 +89,7 @@ class TestExcerptValidity:
    """Tests that verify excerpts are actually found in the source text."""
    def test_excerpt_exists_in_source_text(self, sample_text_file: Path) -> None:
-        """Test that each excerpt can be found in the source text as contiguous words."""
+        """Test that each excerpt can be found in source text."""
        import re
        source_text = sample_text_file.read_text(encoding="utf-8").lower()
--- a/python_pkg/word_frequency/translator.py
+++ b/python_pkg/word_frequency/translator.py
@ -1,149 +1,163 @@
 #!/usr/bin/env python3
-"""Translator - translates words/text between languages.
+r"""Translator - translates words/text between languages.
 This module provides translation capabilities using either:
 1. Argos Translate (offline, requires large downloads) - preferred if installed
 2. deep-translator (online, uses Google Translate) - lightweight fallback
-Usage:
+1. Argos Translate (offline, requires large downloads)
 2. deep-translator (online, uses Google Translate)
 Usage::
    # Translate a single word
-    python -m python_pkg.word_frequency.translator --text "hello" --from en --to es
+    python -m python_pkg.word_frequency.translator \\
        --text "hello" --from en --to es
    # Translate multiple words
-    python -m python_pkg.word_frequency.translator --words hello world goodbye --from en --to pl
+    python -m python_pkg.word_frequency.translator \\
        --words hello world goodbye --from en --to pl
    # Translate words from a file (one word per line)
-    python -m python_pkg.word_frequency.translator --words-file words.txt --from la --to en
+    python -m python_pkg.word_frequency.translator \\
        --words-file words.txt --from la --to en
    # List available languages
-    python -m python_pkg.word_frequency.translator --list-languages
+    python -m python_pkg.word_frequency.translator \\
        --list-languages
    # Output to file
-    python -m python_pkg.word_frequency.translator --words-file vocab.txt --from pl --to en --output translations.txt
+    python -m python_pkg.word_frequency.translator \\
        --words-file vocab.txt --from pl --to en \\
        --output translations.txt
-Dependencies (install one):
+Dependencies (install one)::
-    pip install deep-translator    # Lightweight, uses Google Translate (online)
+
-    pip install argostranslate     # Offline translation (requires ~3GB downloads)
+    pip install deep-translator
    pip install argostranslate
 """
 from __future__ import annotations
 import argparse
 import importlib
 import logging
 import os
 from pathlib import Path
 import subprocess
 import sys
 from typing import TYPE_CHECKING, NamedTuple
 if TYPE_CHECKING:
    from collections.abc import Sequence
-# Lazy imports for translation backends (may not be installed)
+try:
-_argos_available: bool | None = None
+    import torch
-_deep_translator_available: bool | None = None
+except ImportError:
-_langdetect_available: bool | None = None
+    torch = None  # type: ignore[assignment]
-_gpu_initialized: bool = False
+
-_gpu_available: bool | None = None
+try:
    import argostranslate.package
    import argostranslate.translate
 except ImportError:
    argostranslate = None  # type: ignore[assignment]
 try:
    from deep_translator import GoogleTranslator
 except ImportError:
    GoogleTranslator = None
 try:
    import langdetect
 except ImportError:
    langdetect = None  # type: ignore[assignment]
 try:
    from python_pkg.word_frequency.cache import (
        get_translation_cache,
    )
 except ImportError:
    get_translation_cache = None
 logger = logging.getLogger(__name__)
 _LANG_DETECT_SAMPLE_SIZE = 5000
 _BATCH_SIZE = 100
 class _TranslatorState:
    """Holds module-level state for lazy-initialized backends."""
    gpu_initialized: bool = False
 def _check_cuda_available() -> bool:
    """Check if CUDA is available for GPU acceleration."""
-    global _gpu_available
+    return torch is not None and torch.cuda.is_available()
    if _gpu_available is None:
        try:
            import torch
-            _gpu_available = torch.cuda.is_available()
+
-        except ImportError:
+def _validate_gpu_device() -> str:
-            _gpu_available = False
+    """Validate GPU device availability and return device name.
-    return _gpu_available
+
    Raises:
        RuntimeError: If no GPU devices are found.
    """
    device_count = torch.cuda.device_count()
    if device_count == 0:
        msg = "CUDA reports available but no GPU devices found"
        raise RuntimeError(msg)
    return torch.cuda.get_device_name(0)
 def _init_gpu_if_available() -> None:
    """Initialize GPU for argostranslate if CUDA is available.
    Raises:
-        RuntimeError: If CUDA is available but GPU initialization fails.
+        RuntimeError: If CUDA is available but GPU init fails.
    """
-    global _gpu_initialized
+    if _TranslatorState.gpu_initialized:
    if _gpu_initialized:
        return
    if not _check_cuda_available():
-        _gpu_initialized = True
+        _TranslatorState.gpu_initialized = True
        return
-    import sys
+    logger.info(
-
+        "CUDA detected, initializing GPU acceleration..."
-    print("CUDA detected, initializing GPU acceleration...", file=sys.stderr)
+    )
    try:
-        import torch
+        device_name = _validate_gpu_device()
-
+        logger.info("  Using GPU: %s", device_name)
        # Force CTranslate2 to use CUDA
        device_count = torch.cuda.device_count()
        if device_count == 0:
            raise RuntimeError("CUDA reports available but no GPU devices found")
        device_name = torch.cuda.get_device_name(0)
        print(f"  Using GPU: {device_name}", file=sys.stderr)
        # Set environment variable to force GPU usage in argos
        import os
        os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
        os.environ["CT2_USE_EXPERIMENTAL_PACKED_GEMM"] = "1"
-        _gpu_initialized = True
+        _TranslatorState.gpu_initialized = True
-        print("  GPU acceleration enabled.", file=sys.stderr)
+        logger.info("  GPU acceleration enabled.")
    except Exception as e:
-        raise RuntimeError(
+        msg = (
-            f"CUDA is available but GPU initialization failed: {e}\n"
+            f"CUDA is available but GPU initialization failed: "
-            f"This may be due to incompatible CUDA version or driver issues.\n"
+            f"{e}\nThis may be due to incompatible CUDA "
-            f"To disable GPU and use CPU only, set environment variable: CT2_FORCE_CPU=1"
+            "version or driver issues.\n"
-        ) from e
+            "To disable GPU and use CPU only, set "
            "environment variable: CT2_FORCE_CPU=1"
        )
        raise RuntimeError(msg) from e
 def _check_argos() -> bool:
    """Check if argostranslate is available."""
-    global _argos_available
+    return argostranslate is not None
    if _argos_available is None:
        try:
            import argostranslate.package
            import argostranslate.translate
            _ = (argostranslate.package, argostranslate.translate)
            _argos_available = True
        except ImportError:
            _argos_available = False
    return _argos_available
 def _check_deep_translator() -> bool:
    """Check if deep-translator is available."""
-    global _deep_translator_available
+    return GoogleTranslator is not None
    if _deep_translator_available is None:
        try:
            from deep_translator import GoogleTranslator
            _ = GoogleTranslator
            _deep_translator_available = True
        except ImportError:
            _deep_translator_available = False
    return _deep_translator_available
 def _check_langdetect() -> bool:
    """Check if langdetect is available."""
-    global _langdetect_available
+    return langdetect is not None
    if _langdetect_available is None:
        try:
            import langdetect
            _ = langdetect
            _langdetect_available = True
        except ImportError:
            _langdetect_available = False
    return _langdetect_available
 def detect_language(text: str) -> str | None:
@ -158,13 +172,14 @@ def detect_language(text: str) -> str | None:
    if not _check_langdetect():
        return None
    import langdetect
    try:
-        # Use a sample of the text for detection (faster and more reliable)
+        sample = (
-        sample = text[:5000] if len(text) > 5000 else text
+            text[:_LANG_DETECT_SAMPLE_SIZE]
-        return langdetect.detect(sample)  # type: ignore[no-any-return]
+            if len(text) > _LANG_DETECT_SAMPLE_SIZE
-    except langdetect.LangDetectException:  # type: ignore[attr-defined]
+            else text
        )
        return langdetect.detect(sample)  # type: ignore[no-any-return,union-attr]
    except langdetect.LangDetectException:  # type: ignore[attr-defined,union-attr]
        return None
@ -188,8 +203,6 @@ def get_installed_languages() -> list[tuple[str, str]]:
    if not _check_argos():
        return []
    import argostranslate.translate
    languages = argostranslate.translate.get_installed_languages()
    return [(lang.code, lang.name) for lang in languages]
@ -203,8 +216,6 @@ def get_available_packages() -> list[tuple[str, str, str, str]]:
    if not _check_argos():
        return []
    import argostranslate.package
    argostranslate.package.update_package_index()
    available = argostranslate.package.get_available_packages()
    return [
@ -227,12 +238,10 @@ def download_languages(lang_codes: Sequence[str]) -> dict[str, bool]:
    if not _check_argos():
        return {}
    import argostranslate.package
    results: dict[str, bool] = {}
    # Update package index
-    print("Updating package index...")
+    logger.info("Updating package index...")
    argostranslate.package.update_package_index()
    available = argostranslate.package.get_available_packages()
@ -255,13 +264,26 @@ def download_languages(lang_codes: Sequence[str]) -> dict[str, bool]:
            if pkg_key in available_lookup:
                pkg = available_lookup[pkg_key]
                try:
-                    print(f"Downloading {from_code} -> {to_code}...")
+                    logger.info(
                        "Downloading %s -> %s...",
                        from_code,
                        to_code,
                    )
                    argostranslate.package.install_from_path(pkg.download())
                    results[key] = True
-                    print(f"  ✓ Installed {from_code} -> {to_code}")
+                    logger.info(
-                except Exception as e:  # noqa: BLE001
+                        "  Installed %s -> %s",
                        from_code,
                        to_code,
                    )
                except (OSError, RuntimeError, ValueError) as e:
                    results[key] = False
-                    print(f"  ✗ Failed {from_code} -> {to_code}: {e}")
+                    logger.info(
                        "  Failed %s -> %s: %s",
                        from_code,
                        to_code,
                        e,
                    )
            else:
                # Package not available
                results[key] = False
@ -278,32 +300,38 @@ def _ensure_argos_installed() -> None:
    if _check_argos():
        return
-    import subprocess
+    logger.info("argostranslate not found. Attempting to install...")
    import sys
    print("argostranslate not found. Attempting to install...")
    try:
        subprocess.run(
            [sys.executable, "-m", "pip", "install", "argostranslate"],
            check=True,
            capture_output=True,
        )
-        # Reset the check flag and verify
+        # Attempt runtime re-import
-        global _argos_available
+        importlib.import_module("argostranslate.package")
-        _argos_available = None
+        importlib.import_module("argostranslate.translate")
-        if not _check_argos():
+        logger.info("argostranslate installed successfully.")
            raise ImportError("argostranslate installation succeeded but import failed")
        print("argostranslate installed successfully.")
    except subprocess.CalledProcessError as e:
        error_msg = e.stderr.decode() if e.stderr else str(e)
-        raise ImportError(
+        msg = (
-            f"argostranslate is required for offline translation.\n\n"
+            "argostranslate is required for offline "
-            f"Install manually with one of:\n"
+            "translation.\n\n"
-            f"  pip install argostranslate          # In a virtualenv\n"
+            "Install manually with one of:\n"
-            f"  pipx install argostranslate         # System-wide via pipx\n"
+            "  pip install argostranslate"
-            f"  pacman -S python-argostranslate     # Arch Linux (if available)\n\n"
+            "          # In a virtualenv\n"
            "  pipx install argostranslate"
            "         # System-wide via pipx\n"
            "  pacman -S python-argostranslate"
            "     # Arch Linux (if available)\n\n"
            f"Original error: {error_msg}"
-        ) from e
+        )
        raise ImportError(msg) from e
    except ImportError:
        msg = (
            "argostranslate installation succeeded but "
            "import failed"
        )
        raise ImportError(msg) from None
 def _ensure_language_pair(from_lang: str, to_lang: str) -> None:
@ -316,11 +344,9 @@ def _ensure_language_pair(from_lang: str, to_lang: str) -> None:
    Raises:
        ValueError: If language pair cannot be obtained.
    """
-    import argostranslate.package
+    installed_languages = (
-    import argostranslate.translate
+        argostranslate.translate.get_installed_languages()
-
+    )
    # Check if already installed
    installed_languages = argostranslate.translate.get_installed_languages()
    from_lang_obj = None
    to_lang_obj = None
@ -337,37 +363,44 @@ def _ensure_language_pair(from_lang: str, to_lang: str) -> None:
            return  # Already available
    # Need to download
-    import sys
+    logger.info(
-
+        "Downloading language pack: %s -> %s...",
-    print(
+        from_lang,
-        f"Downloading language pack: {from_lang} -> {to_lang}...",
+        to_lang,
        file=sys.stderr,
    )
-    print("  Fetching package index...", file=sys.stderr)
+    logger.info("  Fetching package index...")
    argostranslate.package.update_package_index()
    available = argostranslate.package.get_available_packages()
    pkg = next(
-        (p for p in available if p.from_code == from_lang and p.to_code == to_lang),
+        (
            p
            for p in available
            if p.from_code == from_lang and p.to_code == to_lang
        ),
        None,
    )
    if pkg is None:
-        raise ValueError(
+        msg = (
-            f"No language pack available for {from_lang} -> {to_lang}. "
+            f"No language pack available for "
-            f"Available pairs can be listed with --list-languages."
+            f"{from_lang} -> {to_lang}. "
            "Available pairs can be listed with "
            "--list-languages."
        )
        raise ValueError(msg)
-    print(
+    logger.info(
-        "  Downloading package (~50-100MB, this may take a minute)...",
+        "  Downloading package (~50-100MB, "
-        file=sys.stderr,
+        "this may take a minute)...",
    )
    download_path = pkg.download()
-    print("  Installing language pack...", file=sys.stderr)
+    logger.info("  Installing language pack...")
    argostranslate.package.install_from_path(download_path)
-    print(
+    logger.info(
-        f"Language pack {from_lang} -> {to_lang} installed.",
+        "Language pack %s -> %s installed.",
-        file=sys.stderr,
+        from_lang,
        to_lang,
    )
@ -393,38 +426,30 @@ def translate_word(
        ImportError: If argostranslate is not available and cannot be installed.
    """
    # Check cache first
-    if use_cache:
+    if use_cache and get_translation_cache is not None:
-        try:
+        cache = get_translation_cache()
-            from python_pkg.word_frequency.cache import get_translation_cache
+        cached = cache.get(word, from_lang, to_lang)
-
+        if cached is not None:
-            cache = get_translation_cache()
+            return TranslationResult(
-            cached = cache.get(word, from_lang, to_lang)
+                source_word=word,
-            if cached is not None:
+                translated_word=cached,
-                return TranslationResult(
+                source_lang=from_lang,
-                    source_word=word,
+                target_lang=to_lang,
-                    translated_word=cached,
+                success=True,
-                    source_lang=from_lang,
+            )
                    target_lang=to_lang,
                    success=True,
                )
        except ImportError:
            pass  # Cache not available
    # Ensure argos is installed (will raise if it can't be)
    _ensure_argos_installed()
    import argostranslate.translate
    try:
-        translated = argostranslate.translate.translate(word, from_lang, to_lang)
+        translated = argostranslate.translate.translate(
            word, from_lang, to_lang,
        )
        # Cache the result
-        if use_cache:
+        if use_cache and get_translation_cache is not None:
-            try:
+            get_translation_cache().set(
-                from python_pkg.word_frequency.cache import get_translation_cache
+                word, from_lang, to_lang, translated,
-
+            )
                get_translation_cache().set(word, from_lang, to_lang, translated)
            except ImportError:
                pass
        return TranslationResult(
            source_word=word,
            translated_word=translated,
@ -432,7 +457,7 @@ def translate_word(
            target_lang=to_lang,
            success=True,
        )
-    except Exception as e:  # noqa: BLE001
+    except (OSError, RuntimeError, ValueError, TypeError) as e:
        return TranslationResult(
            source_word=word,
            translated_word="",
@ -483,8 +508,6 @@ def _translate_batch_worker(
    Returns:
        Tuple of (batch_idx, translations dict).
    """
    import argostranslate.translate
    translations: dict[str, str] = {}
    # Batch translate by joining with newlines
@ -507,6 +530,78 @@ def _translate_batch_worker(
    return batch_idx, translations
 def _run_batch_translation(
    words_to_translate: list[str],
    from_lang: str,
    to_lang: str,
 ) -> dict[str, str]:
    """Translate a list of words in batches with progress logging.
    Args:
        words_to_translate: Words needing translation.
        from_lang: Source language code.
        to_lang: Target language code.
    Returns:
        Dict mapping lowercased words to translations.
    Raises:
        RuntimeError: If translation fails.
    """
    new_translations: dict[str, str] = {}
    num_to_translate = len(words_to_translate)
    gpu_status = (
        " (GPU)" if _check_cuda_available() else " (CPU)"
    )
    logger.info(
        "Translating %d words from %s to %s%s...",
        num_to_translate,
        from_lang,
        to_lang,
        gpu_status,
    )
    try:
        batches = [
            words_to_translate[i : i + _BATCH_SIZE]
            for i in range(0, num_to_translate, _BATCH_SIZE)
        ]
        total_batches = len(batches)
        for batch_idx, batch_words in enumerate(batches):
            words_done = min(
                (batch_idx + 1) * _BATCH_SIZE,
                num_to_translate,
            )
            pct = int(words_done / num_to_translate * 100)
            logger.info(
                "  [%3d%%] Translating batch %d/%d "
                "(%d/%d words)...",
                pct,
                batch_idx + 1,
                total_batches,
                words_done,
                num_to_translate,
            )
            _, batch_translations = _translate_batch_worker(
                batch_words, from_lang, to_lang, batch_idx,
            )
            new_translations.update(batch_translations)
        logger.info("  Translation complete.")
    except Exception as e:
        msg = (
            f"Translation failed for "
            f"{from_lang} -> {to_lang}: {e}"
        )
        raise RuntimeError(msg) from e
    return new_translations
 def translate_words_batch(
    words: Sequence[str],
    from_lang: str,
@ -535,90 +630,36 @@ def translate_words_batch(
    if not words:
        return []
    # Ensure argos is installed (will raise if it can't be)
    _ensure_argos_installed()
    # Initialize GPU if available (will raise if CUDA available but fails)
    _init_gpu_if_available()
    # Ensure language pair is available
    _ensure_language_pair(from_lang, to_lang)
    # Check cache for already-translated words
    cached_results: dict[str, str] = {}
-    words_to_translate: list[str] = []
+    if use_cache and get_translation_cache is not None:
-
+        cache = get_translation_cache()
-    if use_cache:
+        cached_results = cache.get_many(
-        try:
+            list(words), from_lang, to_lang,
-            from python_pkg.word_frequency.cache import get_translation_cache
+        )
            cache = get_translation_cache()
            cached_results = cache.get_many(list(words), from_lang, to_lang)
        except ImportError:
            pass
    # Find words that still need translation
-    for word in words:
+    words_to_translate = [
-        if word.lower() not in cached_results:
+        word for word in words
-            words_to_translate.append(word)
+        if word.lower() not in cached_results
    ]
    # Translate uncached words using argos batch
    new_translations: dict[str, str] = {}
    if words_to_translate:
-        import sys
+        new_translations = _run_batch_translation(
-
+            words_to_translate, from_lang, to_lang,
        num_to_translate = len(words_to_translate)
        # Check if GPU is being used
        gpu_status = " (GPU)" if _gpu_available else " (CPU)"
        print(
            f"Translating {num_to_translate} words from {from_lang} to {to_lang}{gpu_status}...",
            file=sys.stderr,
            flush=True,
        )
        try:
            # Split into batches - larger batches are faster but show progress less often
            BATCH_SIZE = 100
            batches: list[list[str]] = []
            for i in range(0, num_to_translate, BATCH_SIZE):
                batches.append(words_to_translate[i : i + BATCH_SIZE])
            total_batches = len(batches)
            # Sequential translation with progress
            # (argostranslate is not thread-safe - uses global model)
            for batch_idx, batch_words in enumerate(batches):
                words_done = (batch_idx + 1) * BATCH_SIZE
                words_done = min(words_done, num_to_translate)
                pct = int(words_done / num_to_translate * 100)
                print(
                    f"  [{pct:3d}%] Translating batch {batch_idx + 1}/{total_batches} "
                    f"({words_done}/{num_to_translate} words)...",
                    file=sys.stderr,
                    flush=True,
                )
                _, batch_translations = _translate_batch_worker(
                    batch_words, from_lang, to_lang, batch_idx
                )
                new_translations.update(batch_translations)
            print("  Translation complete.", file=sys.stderr, flush=True)
        except Exception as e:
            raise RuntimeError(
                f"Translation failed for {from_lang} -> {to_lang}: {e}"
            ) from e
        # Cache new translations
-        if use_cache and new_translations:
+        if use_cache and get_translation_cache is not None:
-            try:
+            get_translation_cache().set_many(
-                from python_pkg.word_frequency.cache import get_translation_cache
+                new_translations, from_lang, to_lang,
-
+            )
                get_translation_cache().set_many(new_translations, from_lang, to_lang)
            except ImportError:
                pass
    # Merge cached and new translations
    all_translations = {**cached_results, **new_translations}
@ -694,22 +735,14 @@ def read_file(filepath: str | Path) -> str:
    return Path(filepath).read_text(encoding="utf-8")
-def main(argv: Sequence[str] | None = None) -> int:
+def _build_parser() -> argparse.ArgumentParser:
-    """Main entry point for the translator.
+    """Build the argument parser for the translator CLI."""
    Args:
        argv: Command line arguments.
    Returns:
        Exit code.
    """
    parser = argparse.ArgumentParser(
        description="Offline translator using Argos Translate.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    # Actions
    action_group = parser.add_mutually_exclusive_group()
    action_group.add_argument(
        "--list-languages",
@ -728,10 +761,12 @@ def main(argv: Sequence[str] | None = None) -> int:
        "-d",
        nargs="+",
        metavar="LANG",
-        help="Download language packs (e.g., --download en es pl)",
+        help=(
            "Download language packs "
            "(e.g., --download en es pl)"
        ),
    )
    # Input
    input_group = parser.add_mutually_exclusive_group()
    input_group.add_argument(
        "--text",
@ -752,7 +787,6 @@ def main(argv: Sequence[str] | None = None) -> int:
        help="File with words to translate (one per line)",
    )
    # Language options
    parser.add_argument(
        "--from",
        "-f",
@ -769,8 +803,6 @@ def main(argv: Sequence[str] | None = None) -> int:
        default="en",
        help="Target language code (default: en)",
    )
    # Output
    parser.add_argument(
        "--output",
        "-o",
@ -778,87 +810,142 @@ def main(argv: Sequence[str] | None = None) -> int:
        help="Output file path",
    )
-    args = parser.parse_args(argv)
+    return parser
-    # Check if argostranslate is available
+
-    if not _check_argos():
+def _handle_list_languages() -> int:
-        print(
+    """Handle --list-languages command."""
-            "Error: argostranslate is not installed.\n"
+    langs = get_installed_languages()
-            "Install it with: pip install argostranslate",
+    if not langs:
-            file=sys.stderr,
+        sys.stdout.write("No languages installed.\n")
        sys.stdout.write(
            "Download some with: --download en es pl de fr\n",
        )
-        return 1
+    else:
        sys.stdout.write("Installed languages:\n")
        for code, name in sorted(langs):
            sys.stdout.write(f"  {code}: {name}\n")
    return 0
    # Handle list-languages
    if args.list_languages:
        langs = get_installed_languages()
        if not langs:
            print("No languages installed.")
            print("Download some with: --download en es pl de fr")
        else:
            print("Installed languages:")
            for code, name in sorted(langs):
                print(f"  {code}: {name}")
        return 0
-    # Handle list-available
+def _handle_list_available() -> int:
-    if args.list_available:
+    """Handle --list-available command."""
-        packages = get_available_packages()
+    packages = get_available_packages()
-        if not packages:
+    if not packages:
-            print("No packages available (check internet connection).")
+        sys.stdout.write(
-        else:
+            "No packages available "
-            print("Available language packages:")
+            "(check internet connection).\n",
-            for from_code, from_name, to_code, to_name in sorted(packages):
+        )
-                print(f"  {from_code} ({from_name}) -> {to_code} ({to_name})")
+    else:
-        return 0
+        sys.stdout.write("Available language packages:\n")
        for from_code, from_name, to_code, to_name in sorted(
            packages,
        ):
            sys.stdout.write(
                f"  {from_code} ({from_name})"
                f" -> {to_code} ({to_name})\n",
            )
    return 0
    # Handle download
    if args.download:
        download_results = download_languages(args.download)
        success_count = sum(1 for v in download_results.values() if v)
        print(f"\nDownloaded {success_count}/{len(download_results)} language pairs.")
        return 0 if success_count > 0 else 1
-    # Handle translation
+def _handle_download(lang_codes: list[str]) -> int:
-    words: list[str] = []
+    """Handle --download command."""
    download_results = download_languages(lang_codes)
    success_count = sum(
        1 for v in download_results.values() if v
    )
    sys.stdout.write(
        f"\nDownloaded {success_count}/"
        f"{len(download_results)} language pairs.\n",
    )
    return 0 if success_count > 0 else 1
 def _collect_words(
    args: argparse.Namespace,
 ) -> list[str] | None:
    """Collect words from args. Returns None on error."""
    if args.text:
-        words = [args.text]
+        return [args.text]
-    elif args.words:
+    if args.words:
-        words = args.words
+        return args.words
-    elif args.words_file:
+    if args.words_file:
        try:
            content = read_file(args.words_file)
            words = [w.strip() for w in content.splitlines() if w.strip()]
        except FileNotFoundError:
-            print(f"Error: File not found: {args.words_file}", file=sys.stderr)
+            sys.stderr.write(
-            return 1
+                f"Error: File not found: {args.words_file}\n",
            )
            return None
        return [
            w.strip()
            for w in content.splitlines()
            if w.strip()
        ]
    return []
    if not words:
        parser.print_help()
        return 1
-    # Translate
+def _handle_translation(args: argparse.Namespace) -> int:
    """Handle the translation action."""
    try:
-        results = translate_words_batch(words, args.from_lang, args.to_lang)
+        results = translate_words_batch(
-    except ImportError as e:
+            args.words, args.from_lang, args.to_lang,
-        print(f"Error: {e}", file=sys.stderr)
+        )
    except ImportError:
        logger.exception("Translation import error")
        return 1
    output = format_translations(results)
    # Output
    if args.output:
        Path(args.output).write_text(output, encoding="utf-8")
-        print(f"Translations written to {args.output}")
+        sys.stdout.write(
            f"Translations written to {args.output}\n",
        )
    else:
-        print(output)
+        sys.stdout.write(output + "\n")
    # Return error if any translation failed
    if any(not r.success for r in results):
        return 1
    return 0
 def main(argv: Sequence[str] | None = None) -> int:
    """Main entry point for the translator.
    Args:
        argv: Command line arguments.
    Returns:
        Exit code.
    """
    parser = _build_parser()
    args = parser.parse_args(argv)
    if not _check_argos():
        sys.stderr.write(
            "Error: argostranslate is not installed.\n"
            "Install it with: pip install argostranslate\n",
        )
        return 1
    if args.list_languages:
        return _handle_list_languages()
    if args.list_available:
        return _handle_list_available()
    if args.download:
        return _handle_download(args.download)
    words = _collect_words(args)
    if not words:
        if words is not None:
            parser.print_help()
        return 1
    args.words = words
    return _handle_translation(args)
 if __name__ == "__main__":
    sys.exit(main())
--- a/python_pkg/word_frequency/vocabulary_curve.py
+++ b/python_pkg/word_frequency/vocabulary_curve.py
@ -14,7 +14,9 @@ Usage:
 from __future__ import annotations
 import argparse
 import logging
 from pathlib import Path
 import re
 import sys
 from typing import TYPE_CHECKING, NamedTuple
@ -27,6 +29,9 @@ except ImportError:
    from analyzer import analyze_text, read_file
 logger = logging.getLogger(__name__)
 class ExcerptAnalysis(NamedTuple):
    """Analysis result for an excerpt length."""
@ -111,8 +116,6 @@ def find_optimal_excerpts(
    ranked_words = [word for word, _ in word_counts.most_common()]
    # Extract all words from text (preserving order)
    import re
    all_words = re.findall(r"\b[\w]+\b", text, re.UNICODE)
    if not case_sensitive:
        all_words = [w.lower() for w in all_words]
@ -150,6 +153,9 @@ def find_optimal_excerpts(
    return results
 _MAX_EXCERPT_DISPLAY_LEN = 50
 def format_results(
    results: list[ExcerptAnalysis],
    *,
@ -198,7 +204,7 @@ def format_results(
        if show_excerpts:
            # Truncate long excerpts
            excerpt = r.best_excerpt
-            if len(excerpt) > 50:
+            if len(excerpt) > _MAX_EXCERPT_DISPLAY_LEN:
                excerpt = excerpt[:47] + "..."
            lines.append(f"{r.excerpt_length:>6}  {r.min_vocab_needed:>5}  {excerpt}")
        else:
@ -285,10 +291,7 @@ def main(argv: Sequence[str] | None = None) -> int:
    args = parser.parse_args(argv)
    try:
-        if args.text:
+        text = args.text or read_file(args.file)
            text = args.text
        else:
            text = read_file(args.file)
        results = find_optimal_excerpts(
            text,
@ -304,15 +307,15 @@ def main(argv: Sequence[str] | None = None) -> int:
        if args.output:
            Path(args.output).write_text(output, encoding="utf-8")
-            print(f"Output written to {args.output}")
+            logger.info("Output written to %s", args.output)
        else:
-            print(output)
+            logger.info("%s", output)
-    except FileNotFoundError as e:
+    except FileNotFoundError:
-        print(f"Error: File not found - {e}", file=sys.stderr)
+        logger.exception("File not found")
        return 1
-    except UnicodeDecodeError as e:
+    except UnicodeDecodeError:
-        print(f"Error: Could not decode file - {e}", file=sys.stderr)
+        logger.exception("Could not decode file")
        return 1
    return 0