testsAndMisc-archive/python_pkg/extract_links/tests/test_main.py
Krzysztof kuhy Rudnicki 22333931cc refactor: rename folders to fix N999, INP001, S311 linting rules
- Rename PYTHON/ to python_pkg/ (fix N999 uppercase folder)
- Rename camelCase folders to snake_case:
  - randomJPG -> random_jpg
  - tagDivider -> tag_divider
  - downloadCats -> download_cats
  - keyboardCoop -> keyboard_coop
  - extractLinks -> extract_links
  - scapeWebsite -> scrape_website
- Rename camelCase files:
  - generateJpeg.py -> generate_jpeg.py
  - tagDivider.py -> tag_divider.py
- Rename poker-modifier-app to poker_modifier_app (fix INP001)
- Add __init__.py to poker_modifier_app
- Replace random module with secrets.SystemRandom (fix S311)
- Fix S110 try-except-pass with contextlib.suppress
- Update all imports and config references
2025-11-30 21:20:17 +01:00

74 lines
2.3 KiB
Python

"""Unit tests for link extraction functionality."""
from pathlib import Path
import subprocess
import sys
# Allow importing from project root when running pytest from this folder
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
SCRIPT = ROOT / "main.py"
def read_lines(p: Path) -> list[str]:
"""Read lines from a file, stripping newlines."""
return [line.rstrip("\n") for line in p.read_text(encoding="utf-8").splitlines()]
def test_extract_hosts_function() -> None:
"""Test extract_hosts_from_html extracts unique hosts in order."""
from main import extract_hosts_from_html
html = (
'<a href="https://wiby.me/">A</a>'
'<a href="http://example.com/page">B</a>'
'<a href="#local">C</a>'
'<a href="mailto:foo@bar.com">D</a>'
'<a href="https://wiby.me/about">E</a>'
)
hosts = extract_hosts_from_html(html)
assert hosts == ["wiby.me", "example.com"], hosts
def test_cli_writes_expected_output(tmp_path: Path) -> None:
"""Test CLI writes correctly formatted output file."""
# copy sample1.html to tmpdir and run the script
sample = ROOT / "tests" / "sample1.html"
html_copy = tmp_path / "sample1.html"
html_copy.write_text(sample.read_text(encoding="utf-8"), encoding="utf-8")
# Run CLI
out_file = tmp_path / "out.txt"
subprocess.run(
[sys.executable, str(SCRIPT), str(html_copy), str(out_file)],
capture_output=True,
text=True,
check=True,
)
assert out_file.exists()
lines = read_lines(out_file)
# Expected order: first time we see wiby.me, then example.com
assert lines == ["*wiby.me*", "*example.com*"], lines
def test_cli_default_output_name(tmp_path: Path) -> None:
"""Test CLI generates default output filename from input."""
sample = ROOT / "tests" / "sample2.html"
html_copy = tmp_path / "sample2.html"
html_copy.write_text(sample.read_text(encoding="utf-8"), encoding="utf-8")
subprocess.run(
[sys.executable, str(SCRIPT), str(html_copy)],
capture_output=True,
text=True,
check=True,
)
default_out = tmp_path / "sample2_links.txt"
assert default_out.exists()
lines = read_lines(default_out)
assert lines == ["*sub.domain.co.uk*", "*example.com:8080*"], lines