mirror of
https://github.com/kuhyx/testsAndMisc-archive.git
synced 2026-07-04 19:43:14 +02:00
- Rename PYTHON/ to python_pkg/ (fix N999 uppercase folder) - Rename camelCase folders to snake_case: - randomJPG -> random_jpg - tagDivider -> tag_divider - downloadCats -> download_cats - keyboardCoop -> keyboard_coop - extractLinks -> extract_links - scapeWebsite -> scrape_website - Rename camelCase files: - generateJpeg.py -> generate_jpeg.py - tagDivider.py -> tag_divider.py - Rename poker-modifier-app to poker_modifier_app (fix INP001) - Add __init__.py to poker_modifier_app - Replace random module with secrets.SystemRandom (fix S311) - Fix S110 try-except-pass with contextlib.suppress - Update all imports and config references
74 lines
2.3 KiB
Python
74 lines
2.3 KiB
Python
"""Unit tests for link extraction functionality."""
|
|
|
|
from pathlib import Path
|
|
import subprocess
|
|
import sys
|
|
|
|
# Allow importing from project root when running pytest from this folder
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.insert(0, str(ROOT))
|
|
SCRIPT = ROOT / "main.py"
|
|
|
|
|
|
def read_lines(p: Path) -> list[str]:
|
|
"""Read lines from a file, stripping newlines."""
|
|
return [line.rstrip("\n") for line in p.read_text(encoding="utf-8").splitlines()]
|
|
|
|
|
|
def test_extract_hosts_function() -> None:
|
|
"""Test extract_hosts_from_html extracts unique hosts in order."""
|
|
from main import extract_hosts_from_html
|
|
|
|
html = (
|
|
'<a href="https://wiby.me/">A</a>'
|
|
'<a href="http://example.com/page">B</a>'
|
|
'<a href="#local">C</a>'
|
|
'<a href="mailto:foo@bar.com">D</a>'
|
|
'<a href="https://wiby.me/about">E</a>'
|
|
)
|
|
hosts = extract_hosts_from_html(html)
|
|
assert hosts == ["wiby.me", "example.com"], hosts
|
|
|
|
|
|
def test_cli_writes_expected_output(tmp_path: Path) -> None:
|
|
"""Test CLI writes correctly formatted output file."""
|
|
# copy sample1.html to tmpdir and run the script
|
|
sample = ROOT / "tests" / "sample1.html"
|
|
html_copy = tmp_path / "sample1.html"
|
|
html_copy.write_text(sample.read_text(encoding="utf-8"), encoding="utf-8")
|
|
|
|
# Run CLI
|
|
out_file = tmp_path / "out.txt"
|
|
subprocess.run(
|
|
[sys.executable, str(SCRIPT), str(html_copy), str(out_file)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
assert out_file.exists()
|
|
|
|
lines = read_lines(out_file)
|
|
# Expected order: first time we see wiby.me, then example.com
|
|
assert lines == ["*wiby.me*", "*example.com*"], lines
|
|
|
|
|
|
def test_cli_default_output_name(tmp_path: Path) -> None:
|
|
"""Test CLI generates default output filename from input."""
|
|
sample = ROOT / "tests" / "sample2.html"
|
|
html_copy = tmp_path / "sample2.html"
|
|
html_copy.write_text(sample.read_text(encoding="utf-8"), encoding="utf-8")
|
|
|
|
subprocess.run(
|
|
[sys.executable, str(SCRIPT), str(html_copy)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
|
|
default_out = tmp_path / "sample2_links.txt"
|
|
assert default_out.exists()
|
|
|
|
lines = read_lines(default_out)
|
|
assert lines == ["*sub.domain.co.uk*", "*example.com:8080*"], lines
|