mirror of
https://github.com/kuhyx/testsAndMisc.git
synced 2026-07-04 17:43:05 +02:00
Fix ruff violations in ~15 source files and ~60+ test files to minimize per-file-ignores in pyproject.toml. Remaining ignores are justified with comments explaining why each suppression is necessary. Source fixes: FBT003 (keyword args), S310 (URL validation), SLF001 (private access), T201 (print→logging), C901 (complexity), E501 (line length), E402 (import order). Test fixes: SIM117 (combined with), FBT (boolean args), PERF203 (try in loop), S310/S607 (URLs/executables), E402/E501 (imports/lines), S108 (tmp paths), PLR0913 (too many args), ARG (unused args), ANN (type annotations), RUF059 (unused unpacked vars), PT019 (fixture naming). Remaining per-file-ignores (with justifications): - Tests: ARG, D, PLC0415, PLR2004, S101, SLF001 - music_gen sources: PLC0415 (heavy ML lazy imports) - moviepy_showcase: PLC0415 (circular dependency) - generate_images: PLR0913 (matplotlib helpers need many params) - praca_magisterska_video: E501, E402 (long paths, mpl.use)
372 lines
13 KiB
Python
372 lines
13 KiB
Python
"""Tests for word_frequency._generation module."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from python_pkg.word_frequency._generation import (
|
|
_detect_source_language,
|
|
cache_deck,
|
|
cache_excerpt,
|
|
generate_flashcards,
|
|
get_cached_deck,
|
|
get_cached_excerpt,
|
|
run_vocabulary_curve,
|
|
run_vocabulary_curve_inverse,
|
|
)
|
|
from python_pkg.word_frequency._types import FlashcardOptions
|
|
from python_pkg.word_frequency.cache import AnkiDeckKey
|
|
|
|
|
|
class TestRunVocabularyCurve:
|
|
"""Tests for run_vocabulary_curve."""
|
|
|
|
def test_executable_not_found(self, tmp_path: Path) -> None:
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.C_EXECUTABLE",
|
|
tmp_path / "nonexistent",
|
|
),
|
|
pytest.raises(FileNotFoundError, match="C executable not found"),
|
|
):
|
|
run_vocabulary_curve(tmp_path / "text.txt", 10)
|
|
|
|
def test_success(self, tmp_path: Path) -> None:
|
|
exe = tmp_path / "vocab_curve"
|
|
exe.write_text("", encoding="utf-8")
|
|
with (
|
|
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
|
|
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
|
|
):
|
|
mock_run.return_value = MagicMock(stdout="output")
|
|
result = run_vocabulary_curve(tmp_path / "text.txt", 10)
|
|
assert result == "output"
|
|
|
|
def test_dump_vocab_flag(self, tmp_path: Path) -> None:
|
|
exe = tmp_path / "vocab_curve"
|
|
exe.write_text("", encoding="utf-8")
|
|
with (
|
|
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
|
|
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
|
|
):
|
|
mock_run.return_value = MagicMock(stdout="output")
|
|
run_vocabulary_curve(tmp_path / "text.txt", 10, dump_vocab=True)
|
|
cmd = mock_run.call_args[0][0]
|
|
assert "--dump-vocab" in cmd
|
|
|
|
|
|
class TestRunVocabularyCurveInverse:
|
|
"""Tests for run_vocabulary_curve_inverse."""
|
|
|
|
def test_executable_not_found(self, tmp_path: Path) -> None:
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.C_EXECUTABLE",
|
|
tmp_path / "nonexistent",
|
|
),
|
|
pytest.raises(FileNotFoundError, match="C executable not found"),
|
|
):
|
|
run_vocabulary_curve_inverse(tmp_path / "text.txt", 100)
|
|
|
|
def test_success(self, tmp_path: Path) -> None:
|
|
exe = tmp_path / "vocab_curve"
|
|
exe.write_text("", encoding="utf-8")
|
|
with (
|
|
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
|
|
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
|
|
):
|
|
mock_run.return_value = MagicMock(stdout="output")
|
|
result = run_vocabulary_curve_inverse(tmp_path / "text.txt", 100)
|
|
assert result == "output"
|
|
|
|
def test_dump_vocab_flag(self, tmp_path: Path) -> None:
|
|
exe = tmp_path / "vocab_curve"
|
|
exe.write_text("", encoding="utf-8")
|
|
with (
|
|
patch("python_pkg.word_frequency._generation.C_EXECUTABLE", exe),
|
|
patch("python_pkg.word_frequency._generation.subprocess.run") as mock_run,
|
|
):
|
|
mock_run.return_value = MagicMock(stdout="output")
|
|
run_vocabulary_curve_inverse(tmp_path / "text.txt", 100, dump_vocab=True)
|
|
cmd = mock_run.call_args[0][0]
|
|
assert "--dump-vocab" in cmd
|
|
|
|
|
|
class TestCaching:
|
|
"""Tests for cache helper functions."""
|
|
|
|
def test_get_cached_excerpt_force(self) -> None:
|
|
result = get_cached_excerpt(Path("x.txt"), 10, force=True)
|
|
assert result is None
|
|
|
|
def test_get_cached_excerpt_delegates(self) -> None:
|
|
with patch(
|
|
"python_pkg.word_frequency._generation.get_vocab_curve_cache"
|
|
) as mock:
|
|
mock.return_value.get.return_value = ("ex", [("w", 1)])
|
|
result = get_cached_excerpt(Path("x.txt"), 10)
|
|
assert result == ("ex", [("w", 1)])
|
|
|
|
def test_cache_excerpt_delegates(self) -> None:
|
|
with patch(
|
|
"python_pkg.word_frequency._generation.get_vocab_curve_cache"
|
|
) as mock:
|
|
cache_excerpt(Path("x.txt"), 10, "ex", [("w", 1)])
|
|
mock.return_value.set.assert_called_once()
|
|
|
|
def test_get_cached_deck_force(self) -> None:
|
|
key = AnkiDeckKey(Path("x"), 10, "es", include_context=False, all_vocab=True)
|
|
result = get_cached_deck(key, force=True)
|
|
assert result is None
|
|
|
|
def test_get_cached_deck_delegates(self) -> None:
|
|
key = AnkiDeckKey(Path("x"), 10, "es", include_context=False, all_vocab=True)
|
|
with patch("python_pkg.word_frequency._generation.get_anki_deck_cache") as mock:
|
|
mock.return_value.get.return_value = ("c", "e", 2, 5)
|
|
result = get_cached_deck(key)
|
|
assert result == ("c", "e", 2, 5)
|
|
|
|
def test_cache_deck_delegates(self) -> None:
|
|
key = AnkiDeckKey(Path("x"), 10, "es", include_context=False, all_vocab=True)
|
|
with patch("python_pkg.word_frequency._generation.get_anki_deck_cache") as mock:
|
|
cache_deck(key, "content", "excerpt", 2, 5)
|
|
mock.return_value.set.assert_called_once()
|
|
|
|
|
|
class TestDetectSourceLanguage:
|
|
"""Tests for _detect_source_language."""
|
|
|
|
def test_detects_from_text(self) -> None:
|
|
with patch(
|
|
"python_pkg.word_frequency._generation.detect_language",
|
|
return_value="en",
|
|
):
|
|
result = _detect_source_language(Path("x"), "hello world")
|
|
assert result == "en"
|
|
|
|
def test_reads_file_when_text_empty(self, tmp_path: Path) -> None:
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("hello world", encoding="utf-8")
|
|
with patch(
|
|
"python_pkg.word_frequency._generation.detect_language",
|
|
return_value="en",
|
|
):
|
|
result = _detect_source_language(fp, "")
|
|
assert result == "en"
|
|
|
|
def test_raises_when_detection_fails(self) -> None:
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.detect_language",
|
|
return_value=None,
|
|
),
|
|
pytest.raises(ValueError, match="Could not auto-detect"),
|
|
):
|
|
_detect_source_language(Path("x"), "hello world")
|
|
|
|
|
|
class TestGenerateFlashcards:
|
|
"""Tests for generate_flashcards."""
|
|
|
|
def test_cached_deck_returned(self, tmp_path: Path) -> None:
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("hello", encoding="utf-8")
|
|
with patch(
|
|
"python_pkg.word_frequency._generation.get_cached_deck",
|
|
return_value=("content", "excerpt", 5, 3),
|
|
):
|
|
result = generate_flashcards(fp, 10)
|
|
assert result == ("content", "excerpt", 5, 3)
|
|
|
|
def test_full_generation(self, tmp_path: Path) -> None:
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("hello world", encoding="utf-8")
|
|
vocab_output = """[Length 5] Vocab needed: 2
|
|
Excerpt: "hello world foo bar baz"
|
|
Words: hello(#1), world(#2)
|
|
|
|
VOCAB_DUMP_START
|
|
hello;1
|
|
world;2
|
|
foo;3
|
|
VOCAB_DUMP_END
|
|
"""
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.get_cached_deck",
|
|
return_value=None,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.run_vocabulary_curve",
|
|
return_value=vocab_output,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.detect_language",
|
|
return_value="en",
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.generate_anki_deck",
|
|
return_value="deck content",
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.get_anki_deck_cache"
|
|
) as mock_cache,
|
|
):
|
|
content, excerpt, _, _ = generate_flashcards(
|
|
fp,
|
|
5,
|
|
FlashcardOptions(source_lang="en"),
|
|
)
|
|
assert content == "deck content"
|
|
assert excerpt == "hello world foo bar baz"
|
|
mock_cache.return_value.set.assert_called_once()
|
|
|
|
def test_no_words_raises(self, tmp_path: Path) -> None:
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("hello", encoding="utf-8")
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.get_cached_deck",
|
|
return_value=None,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.run_vocabulary_curve",
|
|
return_value="nothing useful",
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.detect_language",
|
|
return_value="en",
|
|
),
|
|
pytest.raises(ValueError, match="No words found"),
|
|
):
|
|
generate_flashcards(fp, 5, FlashcardOptions(source_lang="en"))
|
|
|
|
def test_no_translate_skips_cache(self, tmp_path: Path) -> None:
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("hello world", encoding="utf-8")
|
|
vocab_output = """[Length 5] Vocab needed: 2
|
|
Excerpt: "hello world foo bar baz"
|
|
Words: hello(#1), world(#2)
|
|
"""
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.run_vocabulary_curve",
|
|
return_value=vocab_output,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.generate_anki_deck",
|
|
return_value="deck",
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.get_anki_deck_cache"
|
|
) as mock_cache,
|
|
):
|
|
generate_flashcards(
|
|
fp,
|
|
5,
|
|
FlashcardOptions(source_lang="en", no_translate=True),
|
|
all_vocab=False,
|
|
)
|
|
mock_cache.return_value.set.assert_not_called()
|
|
|
|
def test_include_context(self, tmp_path: Path) -> None:
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("hello world foo bar baz", encoding="utf-8")
|
|
vocab_output = """[Length 5] Vocab needed: 2
|
|
Excerpt: "hello world foo bar baz"
|
|
Words: hello(#1), world(#2)
|
|
"""
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.get_cached_deck",
|
|
return_value=None,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.run_vocabulary_curve",
|
|
return_value=vocab_output,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.generate_anki_deck",
|
|
return_value="deck",
|
|
),
|
|
patch("python_pkg.word_frequency._generation.get_anki_deck_cache"),
|
|
):
|
|
generate_flashcards(
|
|
fp,
|
|
5,
|
|
FlashcardOptions(
|
|
source_lang="en",
|
|
include_context=True,
|
|
no_translate=True,
|
|
),
|
|
all_vocab=False,
|
|
)
|
|
|
|
def test_auto_detect_language(self, tmp_path: Path) -> None:
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("hello world", encoding="utf-8")
|
|
vocab_output = """[Length 5] Vocab needed: 2
|
|
Excerpt: "hello world foo bar baz"
|
|
Words: hello(#1), world(#2)
|
|
"""
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.get_cached_deck",
|
|
return_value=None,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.run_vocabulary_curve",
|
|
return_value=vocab_output,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.detect_language",
|
|
return_value="en",
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.generate_anki_deck",
|
|
return_value="deck",
|
|
),
|
|
patch("python_pkg.word_frequency._generation.get_anki_deck_cache"),
|
|
):
|
|
content, _, _, _ = generate_flashcards(
|
|
fp, 5, FlashcardOptions(source_lang=None, no_translate=True)
|
|
)
|
|
assert content == "deck"
|
|
|
|
def test_include_context_empty_file(self, tmp_path: Path) -> None:
|
|
"""Cover the re-read path when initial read returns empty."""
|
|
fp = tmp_path / "t.txt"
|
|
fp.write_text("", encoding="utf-8")
|
|
vocab_output = """[Length 1] Vocab needed: 1
|
|
Excerpt: "hello"
|
|
Words: hello(#1)
|
|
"""
|
|
with (
|
|
patch(
|
|
"python_pkg.word_frequency._generation.get_cached_deck",
|
|
return_value=None,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.run_vocabulary_curve",
|
|
return_value=vocab_output,
|
|
),
|
|
patch(
|
|
"python_pkg.word_frequency._generation.generate_anki_deck",
|
|
return_value="deck",
|
|
),
|
|
patch("python_pkg.word_frequency._generation.get_anki_deck_cache"),
|
|
):
|
|
generate_flashcards(
|
|
fp,
|
|
1,
|
|
FlashcardOptions(
|
|
source_lang="en",
|
|
include_context=True,
|
|
no_translate=True,
|
|
),
|
|
all_vocab=False,
|
|
)
|