mirror of
https://github.com/kuhyx/testsAndMisc.git
synced 2026-07-04 19:03:08 +02:00
Fix ruff violations in ~15 source files and ~60+ test files to minimize per-file-ignores in pyproject.toml. Remaining ignores are justified with comments explaining why each suppression is necessary. Source fixes: FBT003 (keyword args), S310 (URL validation), SLF001 (private access), T201 (print→logging), C901 (complexity), E501 (line length), E402 (import order). Test fixes: SIM117 (combined with), FBT (boolean args), PERF203 (try in loop), S310/S607 (URLs/executables), E402/E501 (imports/lines), S108 (tmp paths), PLR0913 (too many args), ARG (unused args), ANN (type annotations), RUF059 (unused unpacked vars), PT019 (fixture naming). Remaining per-file-ignores (with justifications): - Tests: ARG, D, PLC0415, PLR2004, S101, SLF001 - music_gen sources: PLC0415 (heavy ML lazy imports) - moviepy_showcase: PLC0415 (circular dependency) - generate_images: PLR0913 (matplotlib helpers need many params) - praca_magisterska_video: E501, E402 (long paths, mpl.use)
229 lines
6.5 KiB
Python
229 lines
6.5 KiB
Python
"""Tests for word_frequency._parsing module."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from python_pkg.word_frequency._parsing import (
|
|
_parse_excerpt_lines,
|
|
_parse_target_length_block,
|
|
_parse_vocab_dump,
|
|
parse_inverse_mode_output,
|
|
parse_vocabulary_curve_output,
|
|
)
|
|
|
|
|
|
class TestParseVocabDump:
|
|
"""Tests for _parse_vocab_dump."""
|
|
|
|
def test_parses_vocab(self) -> None:
|
|
lines = [
|
|
"VOCAB_DUMP_START",
|
|
"hello;1",
|
|
"world;2",
|
|
"VOCAB_DUMP_END",
|
|
]
|
|
result = _parse_vocab_dump(lines)
|
|
assert result == [("hello", 1), ("world", 2)]
|
|
|
|
def test_no_dump_section(self) -> None:
|
|
lines = ["some random output", "more stuff"]
|
|
result = _parse_vocab_dump(lines)
|
|
assert result == []
|
|
|
|
def test_invalid_rank(self) -> None:
|
|
lines = [
|
|
"VOCAB_DUMP_START",
|
|
"hello;notanumber",
|
|
"world;2",
|
|
"VOCAB_DUMP_END",
|
|
]
|
|
result = _parse_vocab_dump(lines)
|
|
assert result == [("world", 2)]
|
|
|
|
def test_wrong_parts_count(self) -> None:
|
|
lines = [
|
|
"VOCAB_DUMP_START",
|
|
"hello;1;extra",
|
|
"world;2",
|
|
"VOCAB_DUMP_END",
|
|
]
|
|
result = _parse_vocab_dump(lines)
|
|
assert result == [("world", 2)]
|
|
|
|
def test_line_without_semicolon(self) -> None:
|
|
lines = [
|
|
"VOCAB_DUMP_START",
|
|
"no semicolon here",
|
|
"world;2",
|
|
"VOCAB_DUMP_END",
|
|
]
|
|
result = _parse_vocab_dump(lines)
|
|
assert result == [("world", 2)]
|
|
|
|
|
|
class TestParseExcerptLines:
|
|
"""Tests for _parse_excerpt_lines."""
|
|
|
|
def test_single_line_with_quotes(self) -> None:
|
|
lines = ['"hello world"']
|
|
result = _parse_excerpt_lines(lines, 0)
|
|
assert result == "hello world"
|
|
|
|
def test_multi_line(self) -> None:
|
|
lines = ['"hello', 'world"']
|
|
result = _parse_excerpt_lines(lines, 0)
|
|
assert result == "hello world"
|
|
|
|
def test_with_leading_quote(self) -> None:
|
|
lines = ['"hello world"']
|
|
result = _parse_excerpt_lines(lines, 0)
|
|
assert "hello world" in result
|
|
|
|
def test_no_ending_quote(self) -> None:
|
|
lines = ['"hello world']
|
|
result = _parse_excerpt_lines(lines, 0)
|
|
assert "hello world" in result
|
|
|
|
|
|
class TestParseInverseModeOutput:
|
|
"""Tests for parse_inverse_mode_output."""
|
|
|
|
def test_full_output(self) -> None:
|
|
output = """LONGEST EXCERPT: 5 words using top 10 vocabulary
|
|
Excerpt:
|
|
"hello world foo bar baz"
|
|
Rarest word used: baz (#5)
|
|
|
|
VOCAB_DUMP_START
|
|
hello;1
|
|
world;2
|
|
VOCAB_DUMP_END
|
|
"""
|
|
excerpt, length, max_rank, vocab = parse_inverse_mode_output(output)
|
|
assert length == 5
|
|
assert excerpt == "hello world foo bar baz"
|
|
assert max_rank == 5
|
|
assert vocab == [("hello", 1), ("world", 2)]
|
|
|
|
def test_no_rarest_word(self) -> None:
|
|
output = """LONGEST EXCERPT: 3 words
|
|
Excerpt:
|
|
"hello world foo"
|
|
"""
|
|
_, length, max_rank, _ = parse_inverse_mode_output(output)
|
|
assert length == 3
|
|
assert max_rank == 0
|
|
|
|
def test_empty_output(self) -> None:
|
|
excerpt, length, max_rank, vocab = parse_inverse_mode_output("")
|
|
assert excerpt == ""
|
|
assert length == 0
|
|
assert max_rank == 0
|
|
assert vocab == []
|
|
|
|
def test_short_longest_excerpt_line(self) -> None:
|
|
output = "LONGEST EXCERPT: 0"
|
|
_, length, _, _ = parse_inverse_mode_output(output)
|
|
assert length == 0
|
|
|
|
def test_too_few_parts_in_longest_excerpt(self) -> None:
|
|
output = "LONGEST EXCERPT:"
|
|
_, length, _, _ = parse_inverse_mode_output(output)
|
|
assert length == 0
|
|
|
|
def test_rarest_word_without_hash_number(self) -> None:
|
|
output = "Rarest word used: unknown"
|
|
_, _, max_rank, _ = parse_inverse_mode_output(output)
|
|
assert max_rank == 0
|
|
|
|
|
|
class TestParseTargetLengthBlock:
|
|
"""Tests for _parse_target_length_block."""
|
|
|
|
def test_parses_block(self) -> None:
|
|
lines = [
|
|
"[Length 3] Vocab needed: 2",
|
|
' Excerpt: "hello world foo"',
|
|
" Words: hello(#1), world(#2)",
|
|
]
|
|
excerpt, words = _parse_target_length_block(lines, 3)
|
|
assert excerpt == "hello world foo"
|
|
assert ("hello", 1) in words
|
|
assert ("world", 2) in words
|
|
|
|
def test_no_matching_length(self) -> None:
|
|
lines = [
|
|
"[Length 5] Vocab needed: 2",
|
|
' Excerpt: "hello"',
|
|
" Words: hello(#1)",
|
|
]
|
|
excerpt, words = _parse_target_length_block(lines, 999)
|
|
assert excerpt == ""
|
|
assert words == []
|
|
|
|
def test_no_excerpt_line(self) -> None:
|
|
lines = [
|
|
"[Length 3] Vocab needed: 2",
|
|
" Words: hello(#1)",
|
|
]
|
|
excerpt, _ = _parse_target_length_block(lines, 3)
|
|
assert excerpt == ""
|
|
|
|
def test_no_words_line(self) -> None:
|
|
lines = [
|
|
"[Length 3] Vocab needed: 2",
|
|
' Excerpt: "hello world"',
|
|
]
|
|
excerpt, words = _parse_target_length_block(lines, 3)
|
|
assert excerpt == "hello world"
|
|
assert words == []
|
|
|
|
def test_excerpt_without_quotes(self) -> None:
|
|
lines = [
|
|
"[Length 3] Vocab needed: 2",
|
|
" Excerpt: hello world",
|
|
" Words: hello(#1)",
|
|
]
|
|
excerpt, words = _parse_target_length_block(lines, 3)
|
|
assert excerpt == ""
|
|
assert ("hello", 1) in words
|
|
|
|
def test_excerpt_found_but_no_words_before_eof(self) -> None:
|
|
lines = [
|
|
"[Length 3] Vocab needed: 2",
|
|
' Excerpt: "hello"',
|
|
" some random line",
|
|
]
|
|
excerpt, words = _parse_target_length_block(lines, 3)
|
|
assert excerpt == "hello"
|
|
assert words == []
|
|
|
|
|
|
class TestParseVocabularyCurveOutput:
|
|
"""Tests for parse_vocabulary_curve_output."""
|
|
|
|
def test_with_vocab_dump(self) -> None:
|
|
output = """[Length 2] Vocab needed: 2
|
|
Excerpt: "hello world"
|
|
Words: hello(#1), world(#2)
|
|
|
|
VOCAB_DUMP_START
|
|
hello;1
|
|
world;2
|
|
foo;3
|
|
VOCAB_DUMP_END
|
|
"""
|
|
excerpt, words, all_vocab = parse_vocabulary_curve_output(output, 2)
|
|
assert excerpt == "hello world"
|
|
assert len(words) == 2
|
|
assert len(all_vocab) == 3
|
|
|
|
def test_without_vocab_dump(self) -> None:
|
|
output = """[Length 2] Vocab needed: 2
|
|
Excerpt: "hello world"
|
|
Words: hello(#1), world(#2)
|
|
"""
|
|
excerpt, words, all_vocab = parse_vocabulary_curve_output(output, 2)
|
|
assert excerpt == "hello world"
|
|
assert len(words) == 2
|
|
assert all_vocab == []
|