testsAndMisc-archive/python_pkg/word_frequency/tests/test_generation.py

372 lines
13 KiB
Python
Raw Normal View History

"""Tests for word_frequency._generation module."""
from __future__ import annotations
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from python_pkg.word_frequency._generation import (
_detect_source_language,
cache_deck,
cache_excerpt,
generate_flashcards,
get_cached_deck,
get_cached_excerpt,
run_vocabulary_curve,
run_vocabulary_curve_inverse,
)
from python_pkg.word_frequency._types import FlashcardOptions
from python_pkg.word_frequency.cache import AnkiDeckKey
class TestRunVocabularyCurve:
"""Tests for run_vocabulary_curve."""
def test_executable_not_found(self, tmp_path: Path) -> None:
with (
patch(
"python_pkg.word_frequency._generation.C_EXECUTABLE",
tmp_path / "nonexistent",
),
pytest.raises(FileNotFoundError, match="C executable not found"),
):
run_vocabulary_curve(tmp_path / "text.txt", 10)
def test_success(self, tmp_path: Path) -> None:
exe = tmp_path / "vocab_curve"
exe.write_text("", encoding="utf-8")
with (
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
):
mock_run.return_value = MagicMock(stdout="output")
result = run_vocabulary_curve(tmp_path / "text.txt", 10)
assert result == "output"
def test_dump_vocab_flag(self, tmp_path: Path) -> None:
exe = tmp_path / "vocab_curve"
exe.write_text("", encoding="utf-8")
with (
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
):
mock_run.return_value = MagicMock(stdout="output")
run_vocabulary_curve(tmp_path / "text.txt", 10, dump_vocab=True)
cmd = mock_run.call_args[0][0]
assert "--dump-vocab" in cmd
class TestRunVocabularyCurveInverse:
"""Tests for run_vocabulary_curve_inverse."""
def test_executable_not_found(self, tmp_path: Path) -> None:
with (
patch(
"python_pkg.word_frequency._generation.C_EXECUTABLE",
tmp_path / "nonexistent",
),
pytest.raises(FileNotFoundError, match="C executable not found"),
):
run_vocabulary_curve_inverse(tmp_path / "text.txt", 100)
def test_success(self, tmp_path: Path) -> None:
exe = tmp_path / "vocab_curve"
exe.write_text("", encoding="utf-8")
with (
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
):
mock_run.return_value = MagicMock(stdout="output")
result = run_vocabulary_curve_inverse(tmp_path / "text.txt", 100)
assert result == "output"
def test_dump_vocab_flag(self, tmp_path: Path) -> None:
exe = tmp_path / "vocab_curve"
exe.write_text("", encoding="utf-8")
with (
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
):
mock_run.return_value = MagicMock(stdout="output")
run_vocabulary_curve_inverse(tmp_path / "text.txt", 100, dump_vocab=True)
cmd = mock_run.call_args[0][0]
assert "--dump-vocab" in cmd
class TestCaching:
"""Tests for cache helper functions."""
def test_get_cached_excerpt_force(self) -> None:
result = get_cached_excerpt(Path("x.txt"), 10, force=True)
assert result is None
def test_get_cached_excerpt_delegates(self) -> None:
with patch(
"python_pkg.word_frequency._generation.get_vocab_curve_cache"
) as mock:
mock.return_value.get.return_value = ("ex", [("w", 1)])
result = get_cached_excerpt(Path("x.txt"), 10)
assert result == ("ex", [("w", 1)])
def test_cache_excerpt_delegates(self) -> None:
with patch(
"python_pkg.word_frequency._generation.get_vocab_curve_cache"
) as mock:
cache_excerpt(Path("x.txt"), 10, "ex", [("w", 1)])
mock.return_value.set.assert_called_once()
def test_get_cached_deck_force(self) -> None:
key = AnkiDeckKey(Path("x"), 10, "es", include_context=False, all_vocab=True)
result = get_cached_deck(key, force=True)
assert result is None
def test_get_cached_deck_delegates(self) -> None:
key = AnkiDeckKey(Path("x"), 10, "es", include_context=False, all_vocab=True)
with patch("python_pkg.word_frequency._generation.get_anki_deck_cache") as mock:
mock.return_value.get.return_value = ("c", "e", 2, 5)
result = get_cached_deck(key)
assert result == ("c", "e", 2, 5)
def test_cache_deck_delegates(self) -> None:
key = AnkiDeckKey(Path("x"), 10, "es", include_context=False, all_vocab=True)
with patch("python_pkg.word_frequency._generation.get_anki_deck_cache") as mock:
cache_deck(key, "content", "excerpt", 2, 5)
mock.return_value.set.assert_called_once()
class TestDetectSourceLanguage:
"""Tests for _detect_source_language."""
def test_detects_from_text(self) -> None:
with patch(
"python_pkg.word_frequency._generation.detect_language",
return_value="en",
):
result = _detect_source_language(Path("x"), "hello world")
assert result == "en"
def test_reads_file_when_text_empty(self, tmp_path: Path) -> None:
fp = tmp_path / "t.txt"
fp.write_text("hello world", encoding="utf-8")
with patch(
"python_pkg.word_frequency._generation.detect_language",
return_value="en",
):
result = _detect_source_language(fp, "")
assert result == "en"
def test_raises_when_detection_fails(self) -> None:
with (
patch(
"python_pkg.word_frequency._generation.detect_language",
return_value=None,
),
pytest.raises(ValueError, match="Could not auto-detect"),
):
_detect_source_language(Path("x"), "hello world")
class TestGenerateFlashcards:
"""Tests for generate_flashcards."""
def test_cached_deck_returned(self, tmp_path: Path) -> None:
fp = tmp_path / "t.txt"
fp.write_text("hello", encoding="utf-8")
with patch(
"python_pkg.word_frequency._generation.get_cached_deck",
return_value=("content", "excerpt", 5, 3),
):
result = generate_flashcards(fp, 10)
assert result == ("content", "excerpt", 5, 3)
def test_full_generation(self, tmp_path: Path) -> None:
fp = tmp_path / "t.txt"
fp.write_text("hello world", encoding="utf-8")
vocab_output = """[Length 5] Vocab needed: 2
Excerpt: "hello world foo bar baz"
Words: hello(#1), world(#2)
VOCAB_DUMP_START
hello;1
world;2
foo;3
VOCAB_DUMP_END
"""
with (
patch(
"python_pkg.word_frequency._generation.get_cached_deck",
return_value=None,
),
patch(
"python_pkg.word_frequency._generation.run_vocabulary_curve",
return_value=vocab_output,
),
patch(
"python_pkg.word_frequency._generation.detect_language",
return_value="en",
),
patch(
"python_pkg.word_frequency._generation.generate_anki_deck",
return_value="deck content",
),
patch(
"python_pkg.word_frequency._generation.get_anki_deck_cache"
) as mock_cache,
):
content, excerpt, _, _ = generate_flashcards(
fp,
5,
FlashcardOptions(source_lang="en"),
)
assert content == "deck content"
assert excerpt == "hello world foo bar baz"
mock_cache.return_value.set.assert_called_once()
def test_no_words_raises(self, tmp_path: Path) -> None:
fp = tmp_path / "t.txt"
fp.write_text("hello", encoding="utf-8")
with (
patch(
"python_pkg.word_frequency._generation.get_cached_deck",
return_value=None,
),
patch(
"python_pkg.word_frequency._generation.run_vocabulary_curve",
return_value="nothing useful",
),
patch(
"python_pkg.word_frequency._generation.detect_language",
return_value="en",
),
pytest.raises(ValueError, match="No words found"),
):
generate_flashcards(fp, 5, FlashcardOptions(source_lang="en"))
def test_no_translate_skips_cache(self, tmp_path: Path) -> None:
fp = tmp_path / "t.txt"
fp.write_text("hello world", encoding="utf-8")
vocab_output = """[Length 5] Vocab needed: 2
Excerpt: "hello world foo bar baz"
Words: hello(#1), world(#2)
"""
with (
patch(
"python_pkg.word_frequency._generation.run_vocabulary_curve",
return_value=vocab_output,
),
patch(
"python_pkg.word_frequency._generation.generate_anki_deck",
return_value="deck",
),
patch(
"python_pkg.word_frequency._generation.get_anki_deck_cache"
) as mock_cache,
):
generate_flashcards(
fp,
5,
FlashcardOptions(source_lang="en", no_translate=True),
all_vocab=False,
)
mock_cache.return_value.set.assert_not_called()
def test_include_context(self, tmp_path: Path) -> None:
fp = tmp_path / "t.txt"
fp.write_text("hello world foo bar baz", encoding="utf-8")
vocab_output = """[Length 5] Vocab needed: 2
Excerpt: "hello world foo bar baz"
Words: hello(#1), world(#2)
"""
with (
patch(
"python_pkg.word_frequency._generation.get_cached_deck",
return_value=None,
),
patch(
"python_pkg.word_frequency._generation.run_vocabulary_curve",
return_value=vocab_output,
),
patch(
"python_pkg.word_frequency._generation.generate_anki_deck",
return_value="deck",
),
patch("python_pkg.word_frequency._generation.get_anki_deck_cache"),
):
generate_flashcards(
fp,
5,
FlashcardOptions(
source_lang="en",
include_context=True,
no_translate=True,
),
all_vocab=False,
)
def test_auto_detect_language(self, tmp_path: Path) -> None:
fp = tmp_path / "t.txt"
fp.write_text("hello world", encoding="utf-8")
vocab_output = """[Length 5] Vocab needed: 2
Excerpt: "hello world foo bar baz"
Words: hello(#1), world(#2)
"""
with (
patch(
"python_pkg.word_frequency._generation.get_cached_deck",
return_value=None,
),
patch(
"python_pkg.word_frequency._generation.run_vocabulary_curve",
return_value=vocab_output,
),
patch(
"python_pkg.word_frequency._generation.detect_language",
return_value="en",
),
patch(
"python_pkg.word_frequency._generation.generate_anki_deck",
return_value="deck",
),
patch("python_pkg.word_frequency._generation.get_anki_deck_cache"),
):
content, _, _, _ = generate_flashcards(
fp, 5, FlashcardOptions(source_lang=None, no_translate=True)
)
assert content == "deck"
def test_include_context_empty_file(self, tmp_path: Path) -> None:
"""Cover the re-read path when initial read returns empty."""
fp = tmp_path / "t.txt"
fp.write_text("", encoding="utf-8")
vocab_output = """[Length 1] Vocab needed: 1
Excerpt: "hello"
Words: hello(#1)
"""
with (
patch(
"python_pkg.word_frequency._generation.get_cached_deck",
return_value=None,
),
patch(
"python_pkg.word_frequency._generation.run_vocabulary_curve",
return_value=vocab_output,
),
patch(
"python_pkg.word_frequency._generation.generate_anki_deck",
return_value="deck",
),
patch("python_pkg.word_frequency._generation.get_anki_deck_cache"),
):
generate_flashcards(
fp,
1,
FlashcardOptions(
source_lang="en",
include_context=True,
no_translate=True,
),
all_vocab=False,
)