Add pre-commit workflow and fix linting violations (#2)

* Initial plan * Add pre-commit GitHub workflow and fix linting issues - Created .github/workflows/pre-commit.yml to run pre-commit hooks in CI - Fixed mypy type errors in translator.py - Fixed shellcheck warning in run_anki_generator.sh - Added per-file ignores for word_frequency module legacy code - Applied auto-fixes from ruff, ruff-format, autoflake, prettier - All pre-commit hooks now passing Co-authored-by: kuhyx <147418882+kuhyx@users.noreply.github.com> * Make Python scripts with shebangs executable - Set executable bit for word_frequency module scripts with shebangs - All 30 pre-commit hooks now passing Co-authored-by: kuhyx <147418882+kuhyx@users.noreply.github.com> * Fix: Restore imports in check functions (autoflake-proof) - Restored imports in _check_argos(), _check_deep_translator(), _check_langdetect() - Used _ = module assignment to prevent autoflake from removing imports - These imports test module availability by triggering ImportError if missing - All 30 pre-commit hooks now passing Co-authored-by: kuhyx <147418882+kuhyx@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: kuhyx <147418882+kuhyx@users.noreply.github.com>
2026-07-04 14:23:04 +02:00 · 2026-01-07 22:57:42 +01:00 · 2026-01-07 22:57:42 +01:00 · 0bf6fd1bb1
commit 0bf6fd1bb1
parent f48b1dd178
23 changed files with 124270 additions and 124119 deletions
--- a/python_pkg/word_frequency/analyzer.py
+++ b/python_pkg/word_frequency/analyzer.py
@ -21,10 +21,10 @@ Usage:
 from __future__ import annotations

 import argparse
-import re
-import sys
 from collections import Counter
 from pathlib import Path
+import re
+import sys
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
@ -143,7 +143,9 @@ def format_results(
    # Data rows
    for word, count in items:
        percentage = (count / total_words) * 100
-        lines.append(f"{word:<{max_word_len}}  {count:>{count_width}}  {percentage:>9.2f}%")
+        lines.append(
+            f"{word:<{max_word_len}}  {count:>{count_width}}  {percentage:>9.2f}%"
+        )

    return "\n".join(lines)

@ -242,15 +244,15 @@ def main(argv: Sequence[str] | None = None) -> int:

        if args.output:
            Path(args.output).write_text(result, encoding="utf-8")
-            print(f"Output written to {args.output}")  # noqa: T201
+            print(f"Output written to {args.output}")
        else:
-            print(result)  # noqa: T201
+            print(result)

    except FileNotFoundError as e:
-        print(f"Error: File not found - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: File not found - {e}", file=sys.stderr)
        return 1
    except UnicodeDecodeError as e:
-        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)
        return 1

    return 0
--- a/python_pkg/word_frequency/anki_generator.py
+++ b/python_pkg/word_frequency/anki_generator.py
@ -25,29 +25,30 @@ Output:
 from __future__ import annotations

 import argparse
+from pathlib import Path
 import re
 import subprocess
 import sys
-from collections import Counter
-from pathlib import Path
 from typing import TYPE_CHECKING, NamedTuple

 if TYPE_CHECKING:
    from collections.abc import Sequence

 try:
+    from python_pkg.word_frequency.analyzer import read_file
    from python_pkg.word_frequency.translator import (
        detect_language,
        translate_words_batch,
    )
-    from python_pkg.word_frequency.analyzer import read_file
 except ImportError:
-    from translator import detect_language, translate_words_batch
    from analyzer import read_file
+    from translator import detect_language, translate_words_batch


 # Path to C vocabulary_curve executable
-C_EXECUTABLE = Path(__file__).parent.parent.parent / "C" / "vocabulary_curve" / "vocabulary_curve"
+C_EXECUTABLE = (
+    Path(__file__).parent.parent.parent / "C" / "vocabulary_curve" / "vocabulary_curve"
+)


 class VocabWord(NamedTuple):
@ -59,7 +60,9 @@ class VocabWord(NamedTuple):
    context: str


-def run_vocabulary_curve(filepath: Path, max_length: int, *, dump_vocab: bool = False) -> str:
+def run_vocabulary_curve(
+    filepath: Path, max_length: int, *, dump_vocab: bool = False
+) -> str:
    """Run the C vocabulary_curve executable.

    Args:
@ -94,7 +97,9 @@ def run_vocabulary_curve(filepath: Path, max_length: int, *, dump_vocab: bool =
    return result.stdout


-def run_vocabulary_curve_inverse(filepath: Path, max_vocab: int, *, dump_vocab: bool = False) -> str:
+def run_vocabulary_curve_inverse(
+    filepath: Path, max_vocab: int, *, dump_vocab: bool = False
+) -> str:
    """Run the C vocabulary_curve executable in inverse mode.

    Args:
@ -129,7 +134,9 @@ def run_vocabulary_curve_inverse(filepath: Path, max_vocab: int, *, dump_vocab:
    return result.stdout


-def parse_inverse_mode_output(output: str) -> tuple[str, int, int, list[tuple[str, int]]]:
+def parse_inverse_mode_output(
+    output: str,
+) -> tuple[str, int, int, list[tuple[str, int]]]:
    """Parse output from vocabulary_curve inverse mode.

    Args:
@ -194,7 +201,9 @@ def parse_inverse_mode_output(output: str) -> tuple[str, int, int, list[tuple[st
    return excerpt, excerpt_length, max_rank_used, all_vocab


-def parse_vocabulary_curve_output(output: str, target_length: int) -> tuple[str, list[tuple[str, int]], list[tuple[str, int]]]:
+def parse_vocabulary_curve_output(
+    output: str, target_length: int
+) -> tuple[str, list[tuple[str, int]], list[tuple[str, int]]]:
    """Parse output from vocabulary_curve to get words needed.

    Args:
@ -328,8 +337,8 @@ def generate_anki_deck(
    lines: list[str] = []

    # Add Anki headers
-    lines.append(f"#separator:semicolon")
-    lines.append(f"#html:true")
+    lines.append("#separator:semicolon")
+    lines.append("#html:true")
    lines.append(f"#deck:{deck_name}")
    lines.append(f"#tags:vocabulary {source_lang}")
    if include_context:
@ -351,11 +360,15 @@ def generate_anki_deck(
            if most_frequent != rarest:
                pattern_rare = re.compile(rf"\b({re.escape(rarest)})\b", re.IGNORECASE)
                excerpt_escaped = pattern_rare.sub(r"<b>\1</b>", excerpt_escaped)
-                pattern_freq = re.compile(rf"\b({re.escape(most_frequent)})\b", re.IGNORECASE)
+                pattern_freq = re.compile(
+                    rf"\b({re.escape(most_frequent)})\b", re.IGNORECASE
+                )
                excerpt_escaped = pattern_freq.sub(r"<i>\1</i>", excerpt_escaped)
            else:
                # Same word is both most and least frequent - use bold+italic
-                pattern = re.compile(rf"\b({re.escape(most_frequent)})\b", re.IGNORECASE)
+                pattern = re.compile(
+                    rf"\b({re.escape(most_frequent)})\b", re.IGNORECASE
+                )
                excerpt_escaped = pattern.sub(r"<b><i>\1</i></b>", excerpt_escaped)
        lines.append(f"📖 TARGET EXCERPT;{excerpt_escaped};#0")

@ -391,7 +404,9 @@ def generate_anki_deck(
                context_escaped = pattern.sub(f"<b>{word}</b>", context_escaped)
            else:
                context_escaped = ""
-            lines.append(f"{word_escaped};{translation_escaped};#{rank};{context_escaped}")
+            lines.append(
+                f"{word_escaped};{translation_escaped};#{rank};{context_escaped}"
+            )
        else:
            lines.append(f"{word_escaped};{translation_escaped};#{rank}")

@ -415,6 +430,7 @@ def get_cached_excerpt(
        return None
    try:
        from python_pkg.word_frequency.cache import get_vocab_curve_cache
+
        return get_vocab_curve_cache().get(filepath, length)
    except ImportError:
        return None
@ -433,6 +449,7 @@ def cache_excerpt(
    """
    try:
        from python_pkg.word_frequency.cache import get_vocab_curve_cache
+
        get_vocab_curve_cache().set(filepath, length, excerpt, words)
    except ImportError:
        pass
@ -464,6 +481,7 @@ def get_cached_deck(
        return None
    try:
        from python_pkg.word_frequency.cache import get_anki_deck_cache
+
        return get_anki_deck_cache().get(
            filepath, length, target_lang, include_context, all_vocab
        )
@ -497,6 +515,7 @@ def cache_deck(
    """
    try:
        from python_pkg.word_frequency.cache import get_anki_deck_cache
+
        get_anki_deck_cache().set(
            filepath,
            length,
@ -568,7 +587,9 @@ def generate_flashcards(
    # Run vocabulary curve analysis with vocab dump for all words
    output = run_vocabulary_curve(filepath, excerpt_length, dump_vocab=all_vocab)
    # Parse the output (now includes all vocabulary from C)
-    excerpt, excerpt_words, all_vocab_words = parse_vocabulary_curve_output(output, excerpt_length)
+    excerpt, excerpt_words, all_vocab_words = parse_vocabulary_curve_output(
+        output, excerpt_length
+    )

    if not excerpt_words:
        raise ValueError(f"No words found for excerpt length {excerpt_length}")
@ -673,7 +694,9 @@ def generate_flashcards_inverse(
    output = run_vocabulary_curve_inverse(filepath, max_vocab, dump_vocab=True)

    # Parse the output
-    excerpt, excerpt_length, max_rank_used, all_vocab_words = parse_inverse_mode_output(output)
+    excerpt, excerpt_length, max_rank_used, all_vocab_words = parse_inverse_mode_output(
+        output
+    )

    if excerpt_length == 0:
        raise ValueError(
@ -689,7 +712,9 @@ def generate_flashcards_inverse(

    # Find words that appear in the excerpt (for highlighting)
    excerpt_word_set = set(excerpt.lower().split())
-    excerpt_words = [(w, r) for w, r in all_vocab_words if w.lower() in excerpt_word_set]
+    excerpt_words = [
+        (w, r) for w, r in all_vocab_words if w.lower() in excerpt_word_set
+    ]

    # Get contexts if requested
    contexts = None
@ -835,13 +860,13 @@ def main(argv: Sequence[str] | None = None) -> int:
            try:
                from cache import get_all_cache_stats
            except ImportError:
-                print("Cache module not available", file=sys.stderr)  # noqa: T201
+                print("Cache module not available", file=sys.stderr)
                return 1
        stats = get_all_cache_stats()
-        print("Cache Statistics")  # noqa: T201
-        print("=" * 50)  # noqa: T201
+        print("Cache Statistics")
+        print("=" * 50)
        for cache_name, cache_stats in stats.items():
-            print(f"\n{cache_name.upper()}:")  # noqa: T201
+            print(f"\n{cache_name.upper()}:")
            for key, value in cache_stats.items():
                if key == "cache_size_bytes":
                    if value < 1024:
@ -850,9 +875,9 @@ def main(argv: Sequence[str] | None = None) -> int:
                        size_str = f"{value / 1024:.1f} KB"
                    else:
                        size_str = f"{value / (1024 * 1024):.1f} MB"
-                    print(f"  {key}: {size_str}")  # noqa: T201
+                    print(f"  {key}: {size_str}")
                else:
-                    print(f"  {key}: {value}")  # noqa: T201
+                    print(f"  {key}: {value}")
        return 0

    if args.clear_cache:
@ -862,10 +887,10 @@ def main(argv: Sequence[str] | None = None) -> int:
            try:
                from cache import clear_all_caches
            except ImportError:
-                print("Cache module not available", file=sys.stderr)  # noqa: T201
+                print("Cache module not available", file=sys.stderr)
                return 1
        clear_all_caches()
-        print("All caches cleared.")  # noqa: T201
+        print("All caches cleared.")
        return 0

    # Validate required arguments for main functionality
@ -879,17 +904,18 @@ def main(argv: Sequence[str] | None = None) -> int:
    try:
        filepath = Path(args.file)
        if not filepath.exists():
-            print(f"Error: File not found: {args.file}", file=sys.stderr)  # noqa: T201
+            print(f"Error: File not found: {args.file}", file=sys.stderr)
            return 1

        # INVERSE MODE: --max-vocab
        if args.max_vocab is not None:
            if not args.quiet:
-                print(f"Analyzing {filepath.name}...")  # noqa: T201
-                print(f"Finding longest excerpt using top {args.max_vocab} words...")  # noqa: T201
+                print(f"Analyzing {filepath.name}...")
+                print(f"Finding longest excerpt using top {args.max_vocab} words...")

            # Generate flashcards in inverse mode
-            anki_content, excerpt, excerpt_length, num_words, max_rank_used = generate_flashcards_inverse(
+            anki_content, excerpt, excerpt_length, num_words, max_rank_used = (
+                generate_flashcards_inverse(
                    filepath,
                    args.max_vocab,
                    source_lang=args.source_lang,
@ -899,43 +925,46 @@ def main(argv: Sequence[str] | None = None) -> int:
                    no_translate=args.no_translate,
                    force=args.force,
                )
+            )

            # Determine output path
            if args.output:
                output_path = Path(args.output)
            else:
-                output_path = filepath.parent / f"{filepath.stem}_anki_top{args.max_vocab}.txt"
+                output_path = (
+                    filepath.parent / f"{filepath.stem}_anki_top{args.max_vocab}.txt"
+                )

            # Write output
            output_path.write_text(anki_content, encoding="utf-8")

            if not args.quiet:
-                print("")  # noqa: T201
-                print("=" * 60)  # noqa: T201
-                print("FLASHCARD GENERATION COMPLETE (INVERSE MODE)")  # noqa: T201
-                print("=" * 60)  # noqa: T201
-                print(f"Learning: top {args.max_vocab} words")  # noqa: T201
-                print(f"Longest excerpt you can understand: {excerpt_length} words")  # noqa: T201
-                print(f'  "{excerpt}"')  # noqa: T201
-                print("")  # noqa: T201
-                print(f"Rarest word in excerpt: #{max_rank_used}")  # noqa: T201
-                print(f"Flashcards: {num_words}")  # noqa: T201
-                print(f"Output file: {output_path}")  # noqa: T201
-                print("")  # noqa: T201
-                print("To import into Anki:")  # noqa: T201
-                print("  1. Open Anki")  # noqa: T201
-                print("  2. File -> Import")  # noqa: T201
-                print(f"  3. Select: {output_path}")  # noqa: T201
-                print("  4. Click Import")  # noqa: T201
+                print()
+                print("=" * 60)
+                print("FLASHCARD GENERATION COMPLETE (INVERSE MODE)")
+                print("=" * 60)
+                print(f"Learning: top {args.max_vocab} words")
+                print(f"Longest excerpt you can understand: {excerpt_length} words")
+                print(f'  "{excerpt}"')
+                print()
+                print(f"Rarest word in excerpt: #{max_rank_used}")
+                print(f"Flashcards: {num_words}")
+                print(f"Output file: {output_path}")
+                print()
+                print("To import into Anki:")
+                print("  1. Open Anki")
+                print("  2. File -> Import")
+                print(f"  3. Select: {output_path}")
+                print("  4. Click Import")
            else:
-                print(output_path)  # noqa: T201
+                print(output_path)

            return 0

        # NORMAL MODE: --length
        if not args.quiet:
-            print(f"Analyzing {filepath.name}...")  # noqa: T201
-            print(f"Finding vocabulary for {args.length}-word excerpt...")  # noqa: T201
+            print(f"Analyzing {filepath.name}...")
+            print(f"Finding vocabulary for {args.length}-word excerpt...")

        # Generate flashcards
        anki_content, excerpt, num_words, max_rank = generate_flashcards(
@ -960,38 +989,38 @@ def main(argv: Sequence[str] | None = None) -> int:
        output_path.write_text(anki_content, encoding="utf-8")

        if not args.quiet:
-            print("")  # noqa: T201
-            print("=" * 60)  # noqa: T201
-            print("FLASHCARD GENERATION COMPLETE")  # noqa: T201
-            print("=" * 60)  # noqa: T201
-            print(f"Excerpt to understand ({args.length} words):")  # noqa: T201
-            print(f'  "{excerpt}"')  # noqa: T201
-            print("")  # noqa: T201
-            print(f"Max word rank needed: #{max_rank}")  # noqa: T201
+            print()
+            print("=" * 60)
+            print("FLASHCARD GENERATION COMPLETE")
+            print("=" * 60)
+            print(f"Excerpt to understand ({args.length} words):")
+            print(f'  "{excerpt}"')
+            print()
+            print(f"Max word rank needed: #{max_rank}")
            if args.excerpt_words_only:
-                print(f"Flashcards: {num_words} (excerpt words only)")  # noqa: T201
+                print(f"Flashcards: {num_words} (excerpt words only)")
            else:
-                print(f"Flashcards: {num_words} (ALL words rank #1 to #{max_rank})")  # noqa: T201
-            print(f"Output file: {output_path}")  # noqa: T201
-            print("")  # noqa: T201
-            print("To import into Anki:")  # noqa: T201
-            print("  1. Open Anki")  # noqa: T201
-            print("  2. File -> Import")  # noqa: T201
-            print(f"  3. Select: {output_path}")  # noqa: T201
-            print("  4. Click Import")  # noqa: T201
+                print(f"Flashcards: {num_words} (ALL words rank #1 to #{max_rank})")
+            print(f"Output file: {output_path}")
+            print()
+            print("To import into Anki:")
+            print("  1. Open Anki")
+            print("  2. File -> Import")
+            print(f"  3. Select: {output_path}")
+            print("  4. Click Import")
        else:
-            print(output_path)  # noqa: T201
+            print(output_path)

        return 0

    except FileNotFoundError as e:
-        print(f"Error: {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: {e}", file=sys.stderr)
        return 1
    except subprocess.CalledProcessError as e:
-        print(f"Error running vocabulary_curve: {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error running vocabulary_curve: {e}", file=sys.stderr)
        return 1
    except ValueError as e:
-        print(f"Error: {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: {e}", file=sys.stderr)
        return 1


--- a/python_pkg/word_frequency/cache.py
+++ b/python_pkg/word_frequency/cache.py
@ -15,10 +15,7 @@ import hashlib
 import json
 import os
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    pass
+from typing import Any

 # Default cache directory
 DEFAULT_CACHE_DIR = Path.home() / ".cache" / "word_frequency"
@ -88,7 +85,9 @@ class TranslationCache:
        if self._cache is None:
            if self.cache_file.exists():
                try:
-                    self._cache = json.loads(self.cache_file.read_text(encoding="utf-8"))
+                    self._cache = json.loads(
+                        self.cache_file.read_text(encoding="utf-8")
+                    )
                except (json.JSONDecodeError, OSError):
                    self._cache = {}
            else:
@ -122,9 +121,7 @@ class TranslationCache:
        """
        return f"{source_lang}:{target_lang}:{word.lower()}"

-    def get(
-        self, word: str, source_lang: str, target_lang: str
-    ) -> str | None:
+    def get(self, word: str, source_lang: str, target_lang: str) -> str | None:
        """Get cached translation.

        Args:
@ -140,8 +137,13 @@ class TranslationCache:
        return cache.get(key)

    def set(
-        self, word: str, source_lang: str, target_lang: str, translation: str,
-        *, auto_save: bool = False,
+        self,
+        word: str,
+        source_lang: str,
+        target_lang: str,
+        translation: str,
+        *,
+        auto_save: bool = False,
    ) -> None:
        """Store translation in cache.

@ -525,7 +527,7 @@ _anki_deck_cache: AnkiDeckCache | None = None

 def get_translation_cache() -> TranslationCache:
    """Get the global translation cache instance."""
-    global _translation_cache  # noqa: PLW0603
+    global _translation_cache
    if _translation_cache is None:
        _translation_cache = TranslationCache()
    return _translation_cache
@ -533,7 +535,7 @@ def get_translation_cache() -> TranslationCache:

 def get_vocab_curve_cache() -> VocabCurveCache:
    """Get the global vocabulary curve cache instance."""
-    global _vocab_curve_cache  # noqa: PLW0603
+    global _vocab_curve_cache
    if _vocab_curve_cache is None:
        _vocab_curve_cache = VocabCurveCache()
    return _vocab_curve_cache
@ -541,7 +543,7 @@ def get_vocab_curve_cache() -> VocabCurveCache:

 def get_anki_deck_cache() -> AnkiDeckCache:
    """Get the global Anki deck cache instance."""
-    global _anki_deck_cache  # noqa: PLW0603
+    global _anki_deck_cache
    if _anki_deck_cache is None:
        _anki_deck_cache = AnkiDeckCache()
    return _anki_deck_cache
@ -576,12 +578,8 @@ def main() -> int:
    import argparse

    parser = argparse.ArgumentParser(description="Manage word frequency caches")
-    parser.add_argument(
-        "--stats", action="store_true", help="Show cache statistics"
-    )
-    parser.add_argument(
-        "--clear", action="store_true", help="Clear all caches"
-    )
+    parser.add_argument("--stats", action="store_true", help="Show cache statistics")
+    parser.add_argument("--clear", action="store_true", help="Clear all caches")
    parser.add_argument(
        "--clear-translations", action="store_true", help="Clear translation cache"
    )
@ -596,30 +594,30 @@ def main() -> int:

    if args.clear:
        clear_all_caches()
-        print("All caches cleared.")  # noqa: T201
+        print("All caches cleared.")
        return 0

    if args.clear_translations:
        get_translation_cache().clear()
-        print("Translation cache cleared.")  # noqa: T201
+        print("Translation cache cleared.")
        return 0

    if args.clear_excerpts:
        get_vocab_curve_cache().clear()
-        print("Excerpt cache cleared.")  # noqa: T201
+        print("Excerpt cache cleared.")
        return 0

    if args.clear_anki:
        get_anki_deck_cache().clear()
-        print("Anki deck cache cleared.")  # noqa: T201
+        print("Anki deck cache cleared.")
        return 0

    # Default: show stats
    stats = get_all_cache_stats()
-    print("Cache Statistics")  # noqa: T201
-    print("=" * 50)  # noqa: T201
+    print("Cache Statistics")
+    print("=" * 50)
    for cache_name, cache_stats in stats.items():
-        print(f"\n{cache_name.upper()}:")  # noqa: T201
+        print(f"\n{cache_name.upper()}:")
        for key, value in cache_stats.items():
            if key == "cache_size_bytes":
                # Format as human-readable
@ -629,13 +627,14 @@ def main() -> int:
                    size_str = f"{value / 1024:.1f} KB"
                else:
                    size_str = f"{value / (1024 * 1024):.1f} MB"
-                print(f"  {key}: {size_str}")  # noqa: T201
+                print(f"  {key}: {size_str}")
            else:
-                print(f"  {key}: {value}")  # noqa: T201
+                print(f"  {key}: {value}")

    return 0


 if __name__ == "__main__":
    import sys
+
    sys.exit(main())
--- a/python_pkg/word_frequency/excerpt_finder.py
+++ b/python_pkg/word_frequency/excerpt_finder.py
@ -21,8 +21,8 @@ Usage:
 from __future__ import annotations

 import argparse
-import sys
 from pathlib import Path
+import sys
 from typing import TYPE_CHECKING, NamedTuple

 try:
@ -81,7 +81,9 @@ def find_best_excerpt(
        target_set = {w.lower() for w in target_words}

    # Use sliding window to find the best excerpt
-    results: list[tuple[int, int, float, int]] = []  # (match_count, -start, percentage, start)
+    results: list[
+        tuple[int, int, float, int]
+    ] = []  # (match_count, -start, percentage, start)

    # Count matches in first window
    current_matches = sum(1 for w in words[:excerpt_length] if w in target_set)
@ -219,9 +221,11 @@ def format_excerpt_results(
    for i, result in enumerate(results, 1):
        if len(results) > 1:
            lines.append(f"=== Result #{i} ===")
-        lines.append(f"Excerpt: \"{result.excerpt}\"")
+        lines.append(f'Excerpt: "{result.excerpt}"')
        lines.append(f"Word position: {result.start_index} - {result.end_index - 1}")
-        lines.append(f"Matches: {result.match_count}/{len(result.words)} ({result.match_percentage:.2f}%)")
+        lines.append(
+            f"Matches: {result.match_count}/{len(result.words)} ({result.match_percentage:.2f}%)"
+        )
        lines.append("")

    return "\n".join(lines)
@ -325,7 +329,7 @@ def main(argv: Sequence[str] | None = None) -> int:
            target_words = [w.strip() for w in words_content.splitlines() if w.strip()]

        if not target_words:
-            print("Error: No target words provided", file=sys.stderr)  # noqa: T201
+            print("Error: No target words provided", file=sys.stderr)
            return 1

        # Find excerpts
@ -343,15 +347,15 @@ def main(argv: Sequence[str] | None = None) -> int:

        if args.output:
            Path(args.output).write_text(output, encoding="utf-8")
-            print(f"Output written to {args.output}")  # noqa: T201
+            print(f"Output written to {args.output}")
        else:
-            print(output)  # noqa: T201
+            print(output)

    except FileNotFoundError as e:
-        print(f"Error: File not found - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: File not found - {e}", file=sys.stderr)
        return 1
    except UnicodeDecodeError as e:
-        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)
        return 1

    return 0
--- a/python_pkg/word_frequency/learning_pipe.py
+++ b/python_pkg/word_frequency/learning_pipe.py
@ -31,15 +31,14 @@ Usage:
 from __future__ import annotations

 import argparse
-import sys
 from pathlib import Path
+import sys
 from typing import TYPE_CHECKING

 try:
    from python_pkg.word_frequency.analyzer import analyze_text, read_file
    from python_pkg.word_frequency.excerpt_finder import find_best_excerpt
    from python_pkg.word_frequency.translator import (
-        TranslationResult,
        detect_language,
        translate_words_batch,
    )
@ -47,7 +46,6 @@ except ModuleNotFoundError:
    from analyzer import analyze_text, read_file  # type: ignore[import-not-found]
    from excerpt_finder import find_best_excerpt  # type: ignore[import-not-found]
    from translator import (  # type: ignore[import-not-found]
-        TranslationResult,
        detect_language,
        translate_words_batch,
    )
@ -57,19 +55,108 @@ if TYPE_CHECKING:


 # Common stopwords for various languages (can be overridden with --stopwords)
-DEFAULT_STOPWORDS_EN = frozenset({
-    "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
-    "of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
-    "being", "have", "has", "had", "do", "does", "did", "will", "would",
-    "could", "should", "may", "might", "must", "shall", "can", "this",
-    "that", "these", "those", "i", "you", "he", "she", "it", "we", "they",
-    "me", "him", "her", "us", "them", "my", "your", "his", "its", "our",
-    "their", "what", "which", "who", "whom", "whose", "where", "when",
-    "why", "how", "all", "each", "every", "both", "few", "more", "most",
-    "other", "some", "such", "no", "nor", "not", "only", "own", "same",
-    "so", "than", "too", "very", "just", "as", "if", "then", "because",
-    "while", "although", "though", "after", "before", "when", "where",
-})
+DEFAULT_STOPWORDS_EN = frozenset(
+    {
+        "the",
+        "a",
+        "an",
+        "and",
+        "or",
+        "but",
+        "in",
+        "on",
+        "at",
+        "to",
+        "for",
+        "of",
+        "with",
+        "by",
+        "from",
+        "is",
+        "are",
+        "was",
+        "were",
+        "be",
+        "been",
+        "being",
+        "have",
+        "has",
+        "had",
+        "do",
+        "does",
+        "did",
+        "will",
+        "would",
+        "could",
+        "should",
+        "may",
+        "might",
+        "must",
+        "shall",
+        "can",
+        "this",
+        "that",
+        "these",
+        "those",
+        "i",
+        "you",
+        "he",
+        "she",
+        "it",
+        "we",
+        "they",
+        "me",
+        "him",
+        "her",
+        "us",
+        "them",
+        "my",
+        "your",
+        "his",
+        "its",
+        "our",
+        "their",
+        "what",
+        "which",
+        "who",
+        "whom",
+        "whose",
+        "where",
+        "when",
+        "why",
+        "how",
+        "all",
+        "each",
+        "every",
+        "both",
+        "few",
+        "more",
+        "most",
+        "other",
+        "some",
+        "such",
+        "no",
+        "nor",
+        "not",
+        "only",
+        "own",
+        "same",
+        "so",
+        "than",
+        "too",
+        "very",
+        "just",
+        "as",
+        "if",
+        "then",
+        "because",
+        "while",
+        "although",
+        "though",
+        "after",
+        "before",
+    }
+)


 def load_stopwords(filepath: str | Path | None) -> frozenset[str]:
@ -89,7 +176,9 @@ def load_stopwords(filepath: str | Path | None) -> frozenset[str]:
        return frozenset()

    content = path.read_text(encoding="utf-8")
-    return frozenset(word.strip().lower() for word in content.splitlines() if word.strip())
+    return frozenset(
+        word.strip().lower() for word in content.splitlines() if word.strip()
+    )


 def generate_learning_lesson(
@ -151,9 +240,13 @@ def generate_learning_lesson(
    lines.append("=" * 70)
    lines.append("LANGUAGE LEARNING LESSON")
    lines.append("=" * 70)
-    lines.append(f"Source text: {total_words:,} total words, {len(word_counts):,} unique words")
+    lines.append(
+        f"Source text: {total_words:,} total words, {len(word_counts):,} unique words"
+    )
    if all_stopwords:
-        lines.append(f"After filtering {len(all_stopwords)} stopwords: {len(filtered_words):,} vocabulary words")
+        lines.append(
+            f"After filtering {len(all_stopwords)} stopwords: {len(filtered_words):,} vocabulary words"
+        )
    else:
        lines.append(f"Vocabulary words: {len(filtered_words):,}")

@ -196,7 +289,9 @@ def generate_learning_lesson(
        cumulative_words.extend(word for word, _ in batch_words)

        lines.append("-" * 70)
-        lines.append(f"BATCH {batch_num + 1}: Words {start_idx + 1} - {min(end_idx, len(filtered_words))}")
+        lines.append(
+            f"BATCH {batch_num + 1}: Words {start_idx + 1} - {min(end_idx, len(filtered_words))}"
+        )
        lines.append("-" * 70)
        lines.append("")

@ -230,7 +325,9 @@ def generate_learning_lesson(
        else:
            for i, (word, count) in enumerate(batch_words, start=start_idx + 1):
                percentage = (count / total_words) * 100
-                lines.append(f"  {i:3}. {word:<20} ({count:,} occurrences, {percentage:.2f}%)")
+                lines.append(
+                    f"  {i:3}. {word:<20} ({count:,} occurrences, {percentage:.2f}%)"
+                )

        lines.append("")

@ -239,7 +336,9 @@ def generate_learning_lesson(
            word_counts[word] for word in cumulative_words if word in word_counts
        )
        coverage = (cumulative_count / total_words) * 100
-        lines.append(f"After learning these words, you'll recognize ~{coverage:.1f}% of the text")
+        lines.append(
+            f"After learning these words, you'll recognize ~{coverage:.1f}% of the text"
+        )
        lines.append("")

        # Find excerpts using cumulative words
@ -256,8 +355,10 @@ def generate_learning_lesson(
        )

        for j, excerpt in enumerate(excerpts, 1):
-            lines.append(f"  Excerpt {j} ({excerpt.match_percentage:.1f}% known words):")
-            lines.append(f"  \"{excerpt.excerpt}\"")
+            lines.append(
+                f"  Excerpt {j} ({excerpt.match_percentage:.1f}% known words):"
+            )
+            lines.append(f'  "{excerpt.excerpt}"')
            lines.append("")

    # Summary
@ -431,15 +532,15 @@ def main(argv: Sequence[str] | None = None) -> int:
        # Output
        if args.output:
            Path(args.output).write_text(lesson, encoding="utf-8")
-            print(f"Lesson written to {args.output}")  # noqa: T201
+            print(f"Lesson written to {args.output}")
        else:
-            print(lesson)  # noqa: T201
+            print(lesson)

    except FileNotFoundError as e:
-        print(f"Error: File not found - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: File not found - {e}", file=sys.stderr)
        return 1
    except UnicodeDecodeError as e:
-        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: Could not decode file as UTF-8 - {e}", file=sys.stderr)
        return 1

    return 0
--- a/python_pkg/word_frequency/run_anki_generator.sh
+++ b/python_pkg/word_frequency/run_anki_generator.sh
@ -76,7 +76,8 @@ try_pipx_install() {
 # Create/use a virtualenv for argostranslate
 setup_venv() {
    # Use /tmp for pip cache to avoid home directory quota issues
-    export PIP_CACHE_DIR="/tmp/.pip_cache_$(id -u)"
+    PIP_CACHE_DIR="/tmp/.pip_cache_$(id -u)"
+    export PIP_CACHE_DIR
    mkdir -p "$PIP_CACHE_DIR"

    if [[ ! -d "$VENV_DIR" ]]; then
--- a/python_pkg/word_frequency/test_texts/bible_english.txt
+++ b/python_pkg/word_frequency/test_texts/bible_english.txt
@ -1,4 +1,4 @@
-The Project Gutenberg eBook of The King James Version of the Bible
+The Project Gutenberg eBook of The King James Version of the Bible

 This ebook is for the use of anyone anywhere in the United States and
 most other parts of the world at no cost and with almost no restrictions
@ -99964,5 +99964,3 @@ This website includes information about Project Gutenberg™,
 including how to make donations to the Project Gutenberg Literary
 Archive Foundation, how to help produce our new eBooks, and how to
 subscribe to our email newsletter to hear about new eBooks.
-
-
--- a/python_pkg/word_frequency/test_texts/bible_latin.txt
+++ b/python_pkg/word_frequency/test_texts/bible_latin.txt
@ -1,4 +1,4 @@
-The Project Gutenberg eBook of The Orbis Pictus
+The Project Gutenberg eBook of The Orbis Pictus

 This ebook is for the use of anyone anywhere in the United States and
 most other parts of the world at no cost and with almost no restrictions
@ -12899,5 +12899,3 @@ This website includes information about Project Gutenberg™,
 including how to make donations to the Project Gutenberg Literary
 Archive Foundation, how to help produce our new eBooks, and how to
 subscribe to our email newsletter to hear about new eBooks.
-
-
--- a/python_pkg/word_frequency/test_texts/caesar_latin.txt
+++ b/python_pkg/word_frequency/test_texts/caesar_latin.txt
@ -67,4 +67,3 @@ C. IVLI CAESARIS COMMENTARIORVM DE BELLO GALLICO LIBER PRIMVS
 		Caesar
 		The Latin Library
 		The Classics Page
-	
--- a/python_pkg/word_frequency/tests/test_analyzer.py
+++ b/python_pkg/word_frequency/tests/test_analyzer.py
@ -2,11 +2,9 @@

 from __future__ import annotations

-import tempfile
-import time
 from collections import Counter
 from pathlib import Path
-from typing import TYPE_CHECKING
+import time

 import pytest

@ -20,9 +18,6 @@ from python_pkg.word_frequency.analyzer import (
    read_files,
 )

-if TYPE_CHECKING:
-    from collections.abc import Sequence
-

 class TestExtractWords:
    """Tests for extract_words function."""
--- a/python_pkg/word_frequency/tests/test_anki_generator.py
+++ b/python_pkg/word_frequency/tests/test_anki_generator.py
@ -12,17 +12,16 @@ try:
    from python_pkg.word_frequency.anki_generator import (
        find_word_contexts,
        generate_anki_deck,
-        generate_flashcards,
        main,
        parse_vocabulary_curve_output,
    )
 except ImportError:
    import sys
+
    sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
    from python_pkg.word_frequency.anki_generator import (
        find_word_contexts,
        generate_anki_deck,
-        generate_flashcards,
        main,
        parse_vocabulary_curve_output,
    )
@ -78,19 +77,25 @@ class TestParseVocabularyCurveOutput:

    def test_parse_length_1(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for length 1."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 1)
+        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+            sample_vocabulary_output, 1
+        )
        assert excerpt == "the"
        assert excerpt_words == [("the", 1)]

    def test_parse_length_2(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for length 2."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 2)
+        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+            sample_vocabulary_output, 2
+        )
        assert excerpt == "the dog"
        assert excerpt_words == [("the", 1), ("dog", 2)]

    def test_parse_length_3(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for length 3."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 3)
+        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+            sample_vocabulary_output, 3
+        )
        assert excerpt == "the quick fox"
        assert len(excerpt_words) == 3
        assert ("the", 1) in excerpt_words
@ -99,7 +104,9 @@ class TestParseVocabularyCurveOutput:

    def test_parse_nonexistent_length(self, sample_vocabulary_output: str) -> None:
        """Test parsing output for non-existent length."""
-        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 100)
+        excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(
+            sample_vocabulary_output, 100
+        )
        assert excerpt == ""
        assert excerpt_words == []

--- a/python_pkg/word_frequency/tests/test_excerpt_finder.py
+++ b/python_pkg/word_frequency/tests/test_excerpt_finder.py
@ -2,8 +2,8 @@

 from __future__ import annotations

-import time
 from pathlib import Path
+import time

 import pytest

@ -324,7 +324,7 @@ class TestMain:
                "2",
            ]
        )
-        captured = capsys.readouterr()
+        capsys.readouterr()

        assert exit_code == 0
        # Excerpt should include context words
@ -342,7 +342,7 @@ class TestMain:
                "--case-sensitive",
            ]
        )
-        captured = capsys.readouterr()
+        capsys.readouterr()

        assert exit_code == 0
        # Only lowercase "hello" should match
--- a/python_pkg/word_frequency/tests/test_learning_pipe.py
+++ b/python_pkg/word_frequency/tests/test_learning_pipe.py
@ -2,20 +2,20 @@

 from __future__ import annotations

-import time
 from pathlib import Path
+import time
 from typing import TYPE_CHECKING
 from unittest.mock import MagicMock, patch

 import pytest

+import python_pkg.word_frequency.learning_pipe as learning_pipe_module
 from python_pkg.word_frequency.learning_pipe import (
    DEFAULT_STOPWORDS_EN,
    generate_learning_lesson,
    load_stopwords,
    main,
 )
-import python_pkg.word_frequency.learning_pipe as learning_pipe_module
 from python_pkg.word_frequency.translator import TranslationResult

 if TYPE_CHECKING:
@ -25,12 +25,13 @@ if TYPE_CHECKING:
@pytest.fixture
 def mock_translation() -> Generator[MagicMock, None, None]:
    """Mock translation to avoid requiring argostranslate."""
+
    def fake_batch_translate(
        words: list[str],
        from_lang: str,
        to_lang: str,
        *,
-        use_cache: bool = True,  # noqa: ARG001
+        use_cache: bool = True,
    ) -> list[TranslationResult]:
        """Fake batch translation that returns word with prefix."""
        return [
@ -274,7 +275,7 @@ class TestMain:
                "5",
            ]
        )
-        captured = capsys.readouterr()
+        capsys.readouterr()

        assert exit_code == 0
        # "hello" should be filtered by custom stopwords
@ -392,12 +393,17 @@ class TestTranslationIntegration:
        text_file.write_text("hello world hello world hello", encoding="utf-8")

        # Should work with mocked translation
-        result = main([
-            "--file", str(text_file),
-            "--translate-from", "en",
-            "--translate-to", "es",
+        result = main(
+            [
+                "--file",
+                str(text_file),
+                "--translate-from",
+                "en",
+                "--translate-to",
+                "es",
                "--no-default-stopwords",
-        ])
+            ]
+        )

        assert result == 0

@ -437,4 +443,3 @@ class TestTranslationIntegration:
        # Should not have translation output
        assert "Translation:" not in result
        assert "Detected language:" not in result
-
--- a/python_pkg/word_frequency/tests/test_translator.py
+++ b/python_pkg/word_frequency/tests/test_translator.py
@ -2,8 +2,8 @@

 from __future__ import annotations

-import sys
 from pathlib import Path
+import sys
 from typing import TYPE_CHECKING
 from unittest.mock import MagicMock, patch

@ -52,7 +52,9 @@ class ArgosAvailableMock:
    Works whether argos is installed or not by patching sys.modules.
    """

-    def __init__(self, translate_returns: str | list[str] | Exception | None = None) -> None:
+    def __init__(
+        self, translate_returns: str | list[str] | Exception | None = None
+    ) -> None:
        """Initialize with return values for translate()."""
        self.translate_returns = translate_returns
        self.mock_translate_fn = MagicMock()
@ -69,9 +71,9 @@ class ArgosAvailableMock:
        translator._argos_available = True

        # Set up translate return value
-        if isinstance(self.translate_returns, Exception):
-            self.mock_translate_fn.side_effect = self.translate_returns
-        elif isinstance(self.translate_returns, list):
+        if isinstance(self.translate_returns, Exception) or isinstance(
+            self.translate_returns, list
+        ):
            self.mock_translate_fn.side_effect = self.translate_returns
        elif self.translate_returns is not None:
            self.mock_translate_fn.return_value = self.translate_returns
@ -102,9 +104,9 @@ class ArgosAvailableMock:
            translator, "_ensure_language_pair", lambda f, t: None
        )

-        self._sys_modules_patcher.start()
-        self._ensure_patcher.start()
-        self._lang_patcher.start()
+        self._sys_modules_patcher.start()  # type: ignore[union-attr]
+        self._ensure_patcher.start()  # type: ignore[union-attr]
+        self._lang_patcher.start()  # type: ignore[union-attr]

        return self.mock_translate_fn

@ -291,9 +293,7 @@ class TestTranslateWordsBatch:
        """Test batch translation falls back to individual when result count mismatches."""
        words = ["one", "two", "three", "four"]
        # First call (batch) returns wrong count, subsequent calls are individual
-        with ArgosAvailableMock(
-            ["wrong", "uno", "dos", "tres", "cuatro"]
-        ) as mock:
+        with ArgosAvailableMock(["wrong", "uno", "dos", "tres", "cuatro"]) as mock:
            results = translate_words_batch(words, "en", "es", use_cache=False)

        assert len(results) == 4
@ -425,7 +425,8 @@ class TestGetInstalledLanguages:
        # We need to mock the translate module's get_installed_languages
        mock_translate_module = MagicMock()
        mock_translate_module.get_installed_languages.return_value = [
-            mock_lang1, mock_lang2
+            mock_lang1,
+            mock_lang2,
        ]
        mock_package_module = MagicMock()
        mock_parent = MagicMock()
@ -507,9 +508,7 @@ class TestMain:
        result = main(["--text", "hello", "--from", "en", "--to", "es"])
        assert result == 1

-    def test_list_languages_empty(
-        self, capsys: pytest.CaptureFixture[str]
-    ) -> None:
+    def test_list_languages_empty(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Test listing languages when none installed."""
        mock_translate_module = MagicMock()
        mock_translate_module.get_installed_languages.return_value = []
@ -572,9 +571,7 @@ class TestMain:
        assert "en" in captured.out
        assert "English" in captured.out

-    def test_translate_single_text(
-        self, capsys: pytest.CaptureFixture[str]
-    ) -> None:
+    def test_translate_single_text(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Test translating single text."""
        with ArgosAvailableMock("hola"):
            result = main(["--text", "hello", "--from", "en", "--to", "es"])
@ -584,9 +581,7 @@ class TestMain:
        assert "hello" in captured.out
        assert "hola" in captured.out

-    def test_translate_multiple_words(
-        self, capsys: pytest.CaptureFixture[str]
-    ) -> None:
+    def test_translate_multiple_words(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Test translating multiple words."""
        with ArgosAvailableMock(["hola", "mundo"]):
            result = main(["--words", "hello", "world", "--from", "en", "--to", "es"])
@ -613,9 +608,7 @@ class TestMain:
        assert "world" in captured.out
        assert "goodbye" in captured.out

-    def test_translate_file_not_found(
-        self, capsys: pytest.CaptureFixture[str]
-    ) -> None:
+    def test_translate_file_not_found(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Test error when words file not found."""
        with ArgosAvailableMock():
            result = main(
@ -654,9 +647,7 @@ class TestMain:
        assert "hello" in content
        assert "hola" in content

-    def test_no_input_shows_help(
-        self, capsys: pytest.CaptureFixture[str]
-    ) -> None:
+    def test_no_input_shows_help(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Test that no input shows help."""
        with ArgosAvailableMock():
            result = main([])
--- a/python_pkg/word_frequency/tests/test_vocabulary_curve.py
+++ b/python_pkg/word_frequency/tests/test_vocabulary_curve.py
@ -3,14 +3,18 @@

 from __future__ import annotations

-import subprocess
-import tempfile
 from pathlib import Path
+import subprocess

 import pytest

 # Path to the C executable
-C_EXECUTABLE = Path(__file__).parent.parent.parent.parent / "C" / "vocabulary_curve" / "vocabulary_curve"
+C_EXECUTABLE = (
+    Path(__file__).parent.parent.parent.parent
+    / "C"
+    / "vocabulary_curve"
+    / "vocabulary_curve"
+)


@pytest.fixture
@ -46,6 +50,7 @@ def run_vocabulary_curve(filepath: Path, max_length: int = 10) -> str:
        capture_output=True,
        text=True,
        timeout=30,
+        check=False,
    )
    return result.stdout

@ -86,8 +91,9 @@ class TestExcerptValidity:
    def test_excerpt_exists_in_source_text(self, sample_text_file: Path) -> None:
        """Test that each excerpt can be found in the source text as contiguous words."""
        import re
+
        source_text = sample_text_file.read_text(encoding="utf-8").lower()
-        source_words = re.findall(r'\b[\w]+\b', source_text)
+        source_words = re.findall(r"\b[\w]+\b", source_text)
        output = run_vocabulary_curve(sample_text_file, max_length=10)
        excerpts = extract_excerpts_from_output(output)

@ -98,7 +104,7 @@ class TestExcerptValidity:
            # Find this sequence in source_words
            found = False
            for i in range(len(source_words) - len(excerpt_words) + 1):
-                if source_words[i:i+len(excerpt_words)] == excerpt_words:
+                if source_words[i : i + len(excerpt_words)] == excerpt_words:
                    found = True
                    break
            assert found, (
@ -114,15 +120,16 @@ class TestExcerptValidity:

        for length, excerpt in excerpts:
            word_count = len(excerpt.split())
-            assert word_count == length, (
-                f"Expected {length} words, got {word_count}: '{excerpt}'"
-            )
+            assert (
+                word_count == length
+            ), f"Expected {length} words, got {word_count}: '{excerpt}'"

    def test_polish_excerpt_exists_in_source(self, polish_text_file: Path) -> None:
        """Test Polish text excerpts are found in source as contiguous words."""
        import re
+
        source_text = polish_text_file.read_text(encoding="utf-8").lower()
-        source_words = re.findall(r'\b[\w]+\b', source_text)
+        source_words = re.findall(r"\b[\w]+\b", source_text)
        output = run_vocabulary_curve(polish_text_file, max_length=8)
        excerpts = extract_excerpts_from_output(output)

@ -133,7 +140,7 @@ class TestExcerptValidity:
            # Find this sequence in source_words
            found = False
            for i in range(len(source_words) - len(excerpt_words) + 1):
-                if source_words[i:i+len(excerpt_words)] == excerpt_words:
+                if source_words[i : i + len(excerpt_words)] == excerpt_words:
                    found = True
                    break
            assert found, (
@ -148,7 +155,7 @@ class TestExcerptValidity:

        source_text = sample_text_file.read_text(encoding="utf-8").lower()
        # Extract words from source
-        source_words = re.findall(r'\b[\w]+\b', source_text)
+        source_words = re.findall(r"\b[\w]+\b", source_text)

        output = run_vocabulary_curve(sample_text_file, max_length=5)
        excerpts = extract_excerpts_from_output(output)
@ -159,7 +166,7 @@ class TestExcerptValidity:
            # Find this sequence in source_words
            found = False
            for i in range(len(source_words) - length + 1):
-                if source_words[i:i+length] == excerpt_words:
+                if source_words[i : i + length] == excerpt_words:
                    found = True
                    break

@ -182,7 +189,7 @@ class TestVocabNeeded:
    def test_vocab_needed_increases_monotonically(self, sample_text_file: Path) -> None:
        """Test that vocab needed never decreases as length increases."""
        output = run_vocabulary_curve(sample_text_file, max_length=10)
-        excerpts = extract_excerpts_from_output(output)
+        extract_excerpts_from_output(output)

        # Extract vocab needed from output
        prev_vocab = 0
@ -192,9 +199,9 @@ class TestVocabNeeded:
                parts = line.split("Vocab needed:")
                if len(parts) > 1:
                    vocab = int(parts[1].split()[0])
-                    assert vocab >= prev_vocab, (
-                        f"Vocab decreased from {prev_vocab} to {vocab}"
-                    )
+                    assert (
+                        vocab >= prev_vocab
+                    ), f"Vocab decreased from {prev_vocab} to {vocab}"
                    prev_vocab = vocab


@ -213,6 +220,7 @@ class TestEdgeCases:
            [str(C_EXECUTABLE), str(filepath), "5"],
            capture_output=True,
            text=True,
+            check=False,
        )

        assert result.returncode != 0 or "No words" in result.stderr
--- a/python_pkg/word_frequency/translator.py
+++ b/python_pkg/word_frequency/translator.py
@ -29,8 +29,8 @@ Dependencies (install one):
 from __future__ import annotations

 import argparse
-import sys
 from pathlib import Path
+import sys
 from typing import TYPE_CHECKING, NamedTuple

 if TYPE_CHECKING:
@ -50,6 +50,7 @@ def _check_cuda_available() -> bool:
    if _gpu_available is None:
        try:
            import torch
+
            _gpu_available = torch.cuda.is_available()
        except ImportError:
            _gpu_available = False
@ -71,11 +72,11 @@ def _init_gpu_if_available() -> None:
        return

    import sys
+
    print("CUDA detected, initializing GPU acceleration...", file=sys.stderr)

    try:
        import torch
-        import ctranslate2

        # Force CTranslate2 to use CUDA
        device_count = torch.cuda.device_count()
@ -87,6 +88,7 @@ def _init_gpu_if_available() -> None:

        # Set environment variable to force GPU usage in argos
        import os
+
        os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
        os.environ["CT2_USE_EXPERIMENTAL_PACKED_GEMM"] = "1"

@ -106,9 +108,10 @@ def _check_argos() -> bool:
    global _argos_available
    if _argos_available is None:
        try:
-            import argostranslate.package  # noqa: F401
-            import argostranslate.translate  # noqa: F401
+            import argostranslate.package
+            import argostranslate.translate

+            _ = (argostranslate.package, argostranslate.translate)
            _argos_available = True
        except ImportError:
            _argos_available = False
@ -120,8 +123,9 @@ def _check_deep_translator() -> bool:
    global _deep_translator_available
    if _deep_translator_available is None:
        try:
-            from deep_translator import GoogleTranslator  # noqa: F401
+            from deep_translator import GoogleTranslator

+            _ = GoogleTranslator
            _deep_translator_available = True
        except ImportError:
            _deep_translator_available = False
@ -133,8 +137,9 @@ def _check_langdetect() -> bool:
    global _langdetect_available
    if _langdetect_available is None:
        try:
-            import langdetect  # noqa: F401
+            import langdetect

+            _ = langdetect
            _langdetect_available = True
        except ImportError:
            _langdetect_available = False
@ -227,7 +232,7 @@ def download_languages(lang_codes: Sequence[str]) -> dict[str, bool]:
    results: dict[str, bool] = {}

    # Update package index
-    print("Updating package index...")  # noqa: T201
+    print("Updating package index...")
    argostranslate.package.update_package_index()
    available = argostranslate.package.get_available_packages()

@ -250,13 +255,13 @@ def download_languages(lang_codes: Sequence[str]) -> dict[str, bool]:
            if pkg_key in available_lookup:
                pkg = available_lookup[pkg_key]
                try:
-                    print(f"Downloading {from_code} -> {to_code}...")  # noqa: T201
+                    print(f"Downloading {from_code} -> {to_code}...")
                    argostranslate.package.install_from_path(pkg.download())
                    results[key] = True
-                    print(f"  ✓ Installed {from_code} -> {to_code}")  # noqa: T201
+                    print(f"  ✓ Installed {from_code} -> {to_code}")
                except Exception as e:  # noqa: BLE001
                    results[key] = False
-                    print(f"  ✗ Failed {from_code} -> {to_code}: {e}")  # noqa: T201
+                    print(f"  ✗ Failed {from_code} -> {to_code}: {e}")
            else:
                # Package not available
                results[key] = False
@ -276,7 +281,7 @@ def _ensure_argos_installed() -> None:
    import subprocess
    import sys

-    print("argostranslate not found. Attempting to install...")  # noqa: T201
+    print("argostranslate not found. Attempting to install...")
    try:
        subprocess.run(
            [sys.executable, "-m", "pip", "install", "argostranslate"],
@ -284,11 +289,11 @@ def _ensure_argos_installed() -> None:
            capture_output=True,
        )
        # Reset the check flag and verify
-        global _argos_available  # noqa: PLW0603
+        global _argos_available
        _argos_available = None
        if not _check_argos():
            raise ImportError("argostranslate installation succeeded but import failed")
-        print("argostranslate installed successfully.")  # noqa: T201
+        print("argostranslate installed successfully.")
    except subprocess.CalledProcessError as e:
        error_msg = e.stderr.decode() if e.stderr else str(e)
        raise ImportError(
@ -354,7 +359,7 @@ def _ensure_language_pair(from_lang: str, to_lang: str) -> None:
        )

    print(
-        f"  Downloading package (~50-100MB, this may take a minute)...",
+        "  Downloading package (~50-100MB, this may take a minute)...",
        file=sys.stderr,
    )
    download_path = pkg.download()
@ -391,6 +396,7 @@ def translate_word(
    if use_cache:
        try:
            from python_pkg.word_frequency.cache import get_translation_cache
+
            cache = get_translation_cache()
            cached = cache.get(word, from_lang, to_lang)
            if cached is not None:
@ -415,6 +421,7 @@ def translate_word(
        if use_cache:
            try:
                from python_pkg.word_frequency.cache import get_translation_cache
+
                get_translation_cache().set(word, from_lang, to_lang, translated)
            except ImportError:
                pass
@ -454,7 +461,9 @@ def translate_words(
    Returns:
        List of TranslationResult for each word.
    """
-    return [translate_word(word, from_lang, to_lang, use_cache=use_cache) for word in words]
+    return [
+        translate_word(word, from_lang, to_lang, use_cache=use_cache) for word in words
+    ]


 def _translate_batch_worker(
@ -492,9 +501,7 @@ def _translate_batch_worker(
    else:
        # Fall back to individual translation for this batch
        for word in batch_words:
-            translated = argostranslate.translate.translate(
-                word, from_lang, to_lang
-            )
+            translated = argostranslate.translate.translate(word, from_lang, to_lang)
            translations[word.lower()] = translated

    return batch_idx, translations
@ -544,6 +551,7 @@ def translate_words_batch(
    if use_cache:
        try:
            from python_pkg.word_frequency.cache import get_translation_cache
+
            cache = get_translation_cache()
            cached_results = cache.get_many(list(words), from_lang, to_lang)
        except ImportError:
@ -574,7 +582,7 @@ def translate_words_batch(
            BATCH_SIZE = 100
            batches: list[list[str]] = []
            for i in range(0, num_to_translate, BATCH_SIZE):
-                batches.append(words_to_translate[i:i + BATCH_SIZE])
+                batches.append(words_to_translate[i : i + BATCH_SIZE])

            total_batches = len(batches)

@ -597,8 +605,8 @@ def translate_words_batch(
                )
                new_translations.update(batch_translations)

-            print(f"  Translation complete.", file=sys.stderr, flush=True)
-        except Exception as e:  # noqa: BLE001
+            print("  Translation complete.", file=sys.stderr, flush=True)
+        except Exception as e:
            raise RuntimeError(
                f"Translation failed for {from_lang} -> {to_lang}: {e}"
            ) from e
@ -607,6 +615,7 @@ def translate_words_batch(
        if use_cache and new_translations:
            try:
                from python_pkg.word_frequency.cache import get_translation_cache
+
                get_translation_cache().set_many(new_translations, from_lang, to_lang)
            except ImportError:
                pass
@ -670,7 +679,9 @@ def format_translations(
    # Data
    for r in results:
        if r.success:
-            lines.append(f"{r.source_word:<{max_source}}  {r.translated_word:<{max_trans}}")
+            lines.append(
+                f"{r.source_word:<{max_source}}  {r.translated_word:<{max_trans}}"
+            )
        elif show_errors:
            error_msg = f"[Error: {r.error}]" if r.error else "[Failed]"
            lines.append(f"{r.source_word:<{max_source}}  {error_msg}")
@ -771,7 +782,7 @@ def main(argv: Sequence[str] | None = None) -> int:

    # Check if argostranslate is available
    if not _check_argos():
-        print(  # noqa: T201
+        print(
            "Error: argostranslate is not installed.\n"
            "Install it with: pip install argostranslate",
            file=sys.stderr,
@ -782,30 +793,30 @@ def main(argv: Sequence[str] | None = None) -> int:
    if args.list_languages:
        langs = get_installed_languages()
        if not langs:
-            print("No languages installed.")  # noqa: T201
-            print("Download some with: --download en es pl de fr")  # noqa: T201
+            print("No languages installed.")
+            print("Download some with: --download en es pl de fr")
        else:
-            print("Installed languages:")  # noqa: T201
+            print("Installed languages:")
            for code, name in sorted(langs):
-                print(f"  {code}: {name}")  # noqa: T201
+                print(f"  {code}: {name}")
        return 0

    # Handle list-available
    if args.list_available:
        packages = get_available_packages()
        if not packages:
-            print("No packages available (check internet connection).")  # noqa: T201
+            print("No packages available (check internet connection).")
        else:
-            print("Available language packages:")  # noqa: T201
+            print("Available language packages:")
            for from_code, from_name, to_code, to_name in sorted(packages):
-                print(f"  {from_code} ({from_name}) -> {to_code} ({to_name})")  # noqa: T201
+                print(f"  {from_code} ({from_name}) -> {to_code} ({to_name})")
        return 0

    # Handle download
    if args.download:
-        results = download_languages(args.download)
-        success_count = sum(1 for v in results.values() if v)
-        print(f"\nDownloaded {success_count}/{len(results)} language pairs.")  # noqa: T201
+        download_results = download_languages(args.download)
+        success_count = sum(1 for v in download_results.values() if v)
+        print(f"\nDownloaded {success_count}/{len(download_results)} language pairs.")
        return 0 if success_count > 0 else 1

    # Handle translation
@ -819,7 +830,7 @@ def main(argv: Sequence[str] | None = None) -> int:
            content = read_file(args.words_file)
            words = [w.strip() for w in content.splitlines() if w.strip()]
        except FileNotFoundError:
-            print(f"Error: File not found: {args.words_file}", file=sys.stderr)  # noqa: T201
+            print(f"Error: File not found: {args.words_file}", file=sys.stderr)
            return 1

    if not words:
@ -830,7 +841,7 @@ def main(argv: Sequence[str] | None = None) -> int:
    try:
        results = translate_words_batch(words, args.from_lang, args.to_lang)
    except ImportError as e:
-        print(f"Error: {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: {e}", file=sys.stderr)
        return 1

    output = format_translations(results)
@ -838,9 +849,9 @@ def main(argv: Sequence[str] | None = None) -> int:
    # Output
    if args.output:
        Path(args.output).write_text(output, encoding="utf-8")
-        print(f"Translations written to {args.output}")  # noqa: T201
+        print(f"Translations written to {args.output}")
    else:
-        print(output)  # noqa: T201
+        print(output)

    # Return error if any translation failed
    if any(not r.success for r in results):
--- a/python_pkg/word_frequency/vocabulary_curve.py
+++ b/python_pkg/word_frequency/vocabulary_curve.py
@ -14,8 +14,8 @@ Usage:
 from __future__ import annotations

 import argparse
-import sys
 from pathlib import Path
+import sys
 from typing import TYPE_CHECKING, NamedTuple

 if TYPE_CHECKING:
@ -112,6 +112,7 @@ def find_optimal_excerpts(

    # Extract all words from text (preserving order)
    import re
+
    all_words = re.findall(r"\b[\w]+\b", text, re.UNICODE)
    if not case_sensitive:
        all_words = [w.lower() for w in all_words]
@ -213,7 +214,9 @@ def format_results(
    if results:
        final = results[-1]
        lines.append(f"To understand a {final.excerpt_length}-word excerpt,")
-        lines.append(f"you need to learn at minimum {final.min_vocab_needed} top words.")
+        lines.append(
+            f"you need to learn at minimum {final.min_vocab_needed} top words."
+        )

    return "\n".join(lines)

@ -301,15 +304,15 @@ def main(argv: Sequence[str] | None = None) -> int:

        if args.output:
            Path(args.output).write_text(output, encoding="utf-8")
-            print(f"Output written to {args.output}")  # noqa: T201
+            print(f"Output written to {args.output}")
        else:
-            print(output)  # noqa: T201
+            print(output)

    except FileNotFoundError as e:
-        print(f"Error: File not found - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: File not found - {e}", file=sys.stderr)
        return 1
    except UnicodeDecodeError as e:
-        print(f"Error: Could not decode file - {e}", file=sys.stderr)  # noqa: T201
+        print(f"Error: Could not decode file - {e}", file=sys.stderr)
        return 1

    return 0