mirror of
https://github.com/kuhyx/testsAndMisc.git
synced 2026-07-04 14:43:01 +02:00
chore: consolidate root configs into meta/, drop unused C dir + split/pdfCentered/geo_data
- Move pyproject.toml, .pre-commit-config.yaml, requirements.txt, run.sh,
lint_python.sh, .fvmrc into meta/ with root symlinks preserving tool
auto-discovery.
- Combine requirements.txt + requirements-dev.txt into meta/requirements.txt
(single sorted source of truth).
- Remove setup.sh, .binary-allowlist, C/ (no native code remains),
python_pkg/{split,pdfCentered,geo_data}, scripts/check_c_cpp_build_files.sh.
- Drop clang-format/cppcheck/flawfinder/check-c-cpp-build-files hooks and
archived path excludes from pre-commit config.
- Add .secret-patterns to .gitignore and untrack it (sensitive content;
full history purge is a follow-up step).
This commit is contained in:
parent
84e5d39137
commit
89b4f59ce9
@ -1,3 +0,0 @@
|
|||||||
# Binary files allowed in the repository.
|
|
||||||
# One glob pattern per line. These are essential for builds and cannot be external.
|
|
||||||
# Lines starting with # are comments.
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,5 +1,8 @@
|
|||||||
# See http://help.github.com/ignore-files/ for more about ignoring files.
|
# See http://help.github.com/ignore-files/ for more about ignoring files.
|
||||||
|
|
||||||
|
# Sensitive — must NEVER be committed (contains regex of home GPS coordinates etc.)
|
||||||
|
.secret-patterns
|
||||||
|
|
||||||
# Compiled output
|
# Compiled output
|
||||||
/dist
|
/dist
|
||||||
/tmp
|
/tmp
|
||||||
|
|||||||
@ -1,449 +0,0 @@
|
|||||||
# ==============================================================================
|
|
||||||
# Pre-commit Configuration - Multi-language Linting & Formatting
|
|
||||||
# ==============================================================================
|
|
||||||
# Install: pre-commit install && pre-commit install --hook-type pre-push
|
|
||||||
# Fast lint: pre-commit run --all-files (linters only, ~10 s)
|
|
||||||
# Full suite: pre-commit run --all-files --hook-stage pre-push (+ tests)
|
|
||||||
# Update hooks: pre-commit autoupdate
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# Global settings
|
|
||||||
default_language_version:
|
|
||||||
python: python3
|
|
||||||
|
|
||||||
# Fail fast on first error (set to false to see all errors)
|
|
||||||
fail_fast: false
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
ci:
|
|
||||||
autofix_commit_msg: "style: auto-fix by pre-commit hooks"
|
|
||||||
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
|
||||||
|
|
||||||
repos:
|
|
||||||
# ===========================================================================
|
|
||||||
# GENERAL HOOKS - File formatting and validation
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
||||||
rev: v4.6.0
|
|
||||||
hooks:
|
|
||||||
- id: trailing-whitespace
|
|
||||||
args: [--markdown-linebreak-ext=md]
|
|
||||||
- id: end-of-file-fixer
|
|
||||||
- id: check-yaml
|
|
||||||
args: [--unsafe]
|
|
||||||
- id: check-json
|
|
||||||
# Exclude JSONC files (VS Code configs, TypeScript configs) and compile_commands.json
|
|
||||||
exclude: ^(\.vscode/|.*/\.vscode/|C/compile_commands\.json|.*tsconfig.*\.json)
|
|
||||||
- id: check-toml
|
|
||||||
- id: check-xml
|
|
||||||
- id: check-added-large-files
|
|
||||||
args: [--maxkb=2000]
|
|
||||||
- id: check-merge-conflict
|
|
||||||
- id: check-case-conflict
|
|
||||||
- id: check-symlinks
|
|
||||||
- id: check-executables-have-shebangs
|
|
||||||
- id: check-shebang-scripts-are-executable
|
|
||||||
- id: detect-private-key
|
|
||||||
- id: debug-statements
|
|
||||||
- id: name-tests-test
|
|
||||||
args: [--pytest-test-first]
|
|
||||||
- id: check-ast
|
|
||||||
- id: check-builtin-literals
|
|
||||||
- id: check-docstring-first
|
|
||||||
- id: fix-byte-order-marker
|
|
||||||
- id: mixed-line-ending
|
|
||||||
args: [--fix=lf]
|
|
||||||
- id: requirements-txt-fixer
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# BINARY BLOCKER - Prevent binary/image files from being committed
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: no-binaries
|
|
||||||
name: Block binary/image files
|
|
||||||
entry: scripts/check_no_binaries.sh
|
|
||||||
language: script
|
|
||||||
always_run: false
|
|
||||||
- id: ai-evidence-contract
|
|
||||||
name: Require AI evidence artifacts for code changes
|
|
||||||
entry: scripts/check_ai_evidence.sh
|
|
||||||
language: script
|
|
||||||
pass_filenames: false
|
|
||||||
always_run: true
|
|
||||||
- id: ai-multifile-contract
|
|
||||||
name: Require workflow contract for multi-file code changes
|
|
||||||
entry: scripts/check_agent_contract.sh
|
|
||||||
language: script
|
|
||||||
pass_filenames: false
|
|
||||||
always_run: true
|
|
||||||
- id: append-only-sessions
|
|
||||||
name: Enforce append-only session logs
|
|
||||||
entry: scripts/check_append_only_sessions.sh
|
|
||||||
language: script
|
|
||||||
pass_filenames: false
|
|
||||||
always_run: true
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# POLLING SCRIPT LINTER - Detect fork-storm anti-patterns in shell scripts
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: no-polling-antipatterns
|
|
||||||
name: Block polling script anti-patterns
|
|
||||||
entry: scripts/check_polling_antipatterns.sh
|
|
||||||
language: script
|
|
||||||
types: [shell]
|
|
||||||
exclude: ^(\.git/|C/|CPP/|phone_focus_mode/lib/tests/|tests/)
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# NOQA BLOCKER - Zero tolerance for noqa/type:ignore suppression comments
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: no-noqa
|
|
||||||
name: Block noqa comments
|
|
||||||
entry: '(?i)#\s*(noqa|type:\s*ignore)'
|
|
||||||
language: pygrep
|
|
||||||
types: [python]
|
|
||||||
- id: no-ruff-noqa
|
|
||||||
name: Block ruff noqa file-level comments
|
|
||||||
entry: '(?i)#\s*ruff:\s*noqa'
|
|
||||||
language: pygrep
|
|
||||||
types: [python]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# RUFF - Fast Python linter and formatter (replaces black, isort, flake8, etc.)
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
||||||
rev: v0.15.2
|
|
||||||
hooks:
|
|
||||||
# Linter - run first to catch issues
|
|
||||||
- id: ruff
|
|
||||||
args:
|
|
||||||
- --fix
|
|
||||||
- --unsafe-fixes
|
|
||||||
- --exit-non-zero-on-fix
|
|
||||||
- --show-fixes
|
|
||||||
types_or: [python, pyi]
|
|
||||||
# Formatter - run after linting
|
|
||||||
- id: ruff-format
|
|
||||||
types_or: [python, pyi]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# MYPY - Static type checking (runs on push only for speed)
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
||||||
rev: v1.13.0
|
|
||||||
hooks:
|
|
||||||
- id: mypy
|
|
||||||
stages: [pre-push]
|
|
||||||
args:
|
|
||||||
- --ignore-missing-imports
|
|
||||||
- --no-error-summary
|
|
||||||
- --disable-error-code=no-untyped-def
|
|
||||||
- --disable-error-code=no-untyped-call
|
|
||||||
- --disable-error-code=var-annotated
|
|
||||||
- --disable-error-code=no-any-unimported
|
|
||||||
- --disable-error-code=type-arg
|
|
||||||
- --disable-error-code=no-any-return
|
|
||||||
- --disable-error-code=misc
|
|
||||||
- --disable-error-code=unused-ignore
|
|
||||||
- --disable-error-code=unreachable
|
|
||||||
- --disable-error-code=assignment
|
|
||||||
- --disable-error-code=no-redef
|
|
||||||
- --disable-error-code=attr-defined
|
|
||||||
- --disable-error-code=arg-type
|
|
||||||
- --disable-error-code=union-attr
|
|
||||||
- --disable-error-code=call-overload
|
|
||||||
- --disable-error-code=return-value
|
|
||||||
- --disable-error-code=redundant-cast
|
|
||||||
- --disable-error-code=empty-body
|
|
||||||
- --disable-error-code=list-item
|
|
||||||
exclude: >-
|
|
||||||
(?x)^(
|
|
||||||
Bash/.*|
|
|
||||||
\.venv/.*|
|
|
||||||
linux_configuration/scripts/misc/testsAndMisc-bash/tools/.*
|
|
||||||
)$
|
|
||||||
additional_dependencies:
|
|
||||||
- types-requests
|
|
||||||
- types-PyYAML
|
|
||||||
- types-python-dateutil
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# PYLINT - Comprehensive Python linter (runs on push only for speed)
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/pylint-dev/pylint
|
|
||||||
rev: v3.3.2
|
|
||||||
hooks:
|
|
||||||
- id: pylint
|
|
||||||
stages: [pre-push]
|
|
||||||
args:
|
|
||||||
- --rcfile=pyproject.toml
|
|
||||||
- --fail-under=8.0
|
|
||||||
- --jobs=0
|
|
||||||
additional_dependencies:
|
|
||||||
- pytest
|
|
||||||
- python-chess
|
|
||||||
- requests
|
|
||||||
- pygame
|
|
||||||
exclude: ^(Bash/|\.venv/)
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# BANDIT - Security linter (runs on push only for speed)
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/PyCQA/bandit
|
|
||||||
rev: 1.7.10
|
|
||||||
hooks:
|
|
||||||
- id: bandit
|
|
||||||
stages: [pre-push]
|
|
||||||
args:
|
|
||||||
- -c
|
|
||||||
- pyproject.toml
|
|
||||||
- --severity-level=high
|
|
||||||
- --confidence-level=medium
|
|
||||||
- --skip=B113
|
|
||||||
additional_dependencies: ["bandit[toml]"]
|
|
||||||
exclude: ^(Bash/|\.venv/|tests/|.*test.*\.py$)
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# PYTEST + COVERAGE - Run tests and enforce 100% code coverage
|
|
||||||
# Only tests for subpackages with changed files are run (see script).
|
|
||||||
# Runs on push only (slow); use --hook-stage pre-push to run manually.
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: pytest-coverage
|
|
||||||
name: pytest with coverage enforcement
|
|
||||||
entry: python scripts/pytest_changed_packages.py
|
|
||||||
language: system
|
|
||||||
types: [python]
|
|
||||||
pass_filenames: true
|
|
||||||
stages: [pre-push]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# VULTURE - Dead code detection (disabled - doesn't work well with pre-commit)
|
|
||||||
# ===========================================================================
|
|
||||||
# - repo: https://github.com/jendrikseipp/vulture
|
|
||||||
# rev: v2.13
|
|
||||||
# hooks:
|
|
||||||
# - id: vulture
|
|
||||||
# args:
|
|
||||||
# - --min-confidence=80
|
|
||||||
# - --exclude=.venv,Bash,__pycache__
|
|
||||||
# exclude: ^(Bash/|\.venv/)
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# PYUPGRADE - Upgrade Python syntax (disabled - incompatible with Python 3.14)
|
|
||||||
# ===========================================================================
|
|
||||||
# - repo: https://github.com/asottile/pyupgrade
|
|
||||||
# rev: v3.19.0
|
|
||||||
# hooks:
|
|
||||||
# - id: pyupgrade
|
|
||||||
# args:
|
|
||||||
# - --py310-plus
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# CODESPELL - Spell checking in code (expanded ignore list for non-English)
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/codespell-project/codespell
|
|
||||||
rev: v2.3.0
|
|
||||||
hooks:
|
|
||||||
- id: codespell
|
|
||||||
args:
|
|
||||||
- --skip=*.json,*.lock,*.min.js,*.min.css,.git,__pycache__,.venv,*.txt
|
|
||||||
- --ignore-words-list=als,ans,ect,nd,som,sur,te,nam,numer,lew,sie,wil,postion,clen,ther,folow,derrive,ony,tje,noe,theses,crate,doubleclick,wile,tabel,pary,blok,bloc,proces,serwer,parametr,adres,hart,dout,metod,tekst,synonim,grup,mosty,lokal,skalar,milion,nowe,tre,hel,alph
|
|
||||||
exclude: ^(Bash/ffmpeg-build/|LaTeX/|CPP/|.*\.geojson$)
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# DOCFORMATTER - Format docstrings (disabled - causes recursion errors)
|
|
||||||
# ===========================================================================
|
|
||||||
# - repo: local
|
|
||||||
# hooks:
|
|
||||||
# - id: docformatter
|
|
||||||
# name: docformatter
|
|
||||||
# entry: docformatter
|
|
||||||
# language: system
|
|
||||||
# types: [python]
|
|
||||||
# args:
|
|
||||||
# - --in-place
|
|
||||||
# - --wrap-summaries=88
|
|
||||||
# - --wrap-descriptions=88
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# INTERROGATE - Docstring coverage (disabled - causes recursion on large files)
|
|
||||||
# ===========================================================================
|
|
||||||
# - repo: https://github.com/econchick/interrogate
|
|
||||||
# rev: 1.7.0
|
|
||||||
# hooks:
|
|
||||||
# - id: interrogate
|
|
||||||
# args:
|
|
||||||
# - --fail-under=0
|
|
||||||
# - --verbose
|
|
||||||
# - --ignore-init-method
|
|
||||||
# - --ignore-init-module
|
|
||||||
# - --ignore-magic
|
|
||||||
# - --ignore-private
|
|
||||||
# - --ignore-semiprivate
|
|
||||||
# - --exclude=Bash,.venv,__pycache__
|
|
||||||
# pass_filenames: false
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# AUTOFLAKE - Remove unused imports/variables
|
|
||||||
# Disabled: fully redundant with ruff (F401, F841, F811) + --fix
|
|
||||||
# ===========================================================================
|
|
||||||
# - repo: https://github.com/PyCQA/autoflake
|
|
||||||
# rev: v2.3.1
|
|
||||||
# hooks:
|
|
||||||
# - id: autoflake
|
|
||||||
# args:
|
|
||||||
# - --in-place
|
|
||||||
# - --remove-all-unused-imports
|
|
||||||
# - --remove-unused-variables
|
|
||||||
# - --remove-duplicate-keys
|
|
||||||
# - --expand-star-imports
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# SAFETY - Check for security vulnerabilities in dependencies
|
|
||||||
# ===========================================================================
|
|
||||||
# Note: Safety requires API key for full functionality, disabled by default
|
|
||||||
# - repo: https://github.com/Lucas-C/pre-commit-hooks-safety
|
|
||||||
# rev: v1.3.2
|
|
||||||
# hooks:
|
|
||||||
# - id: python-safety-dependencies-check
|
|
||||||
# files: requirements.*\.txt$
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# PYRIGHT - Microsoft's type checker (very strict, optional)
|
|
||||||
# ===========================================================================
|
|
||||||
# Uncomment to enable - can be slow and very strict
|
|
||||||
# - repo: https://github.com/RobertCraiworthy/pyright-action
|
|
||||||
# rev: v1.1.350
|
|
||||||
# hooks:
|
|
||||||
# - id: pyright
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# CHECK JSON/YAML/TOML formatting (runs on push only — slow Node.js startup)
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
|
||||||
rev: v4.0.0-alpha.8
|
|
||||||
hooks:
|
|
||||||
- id: prettier
|
|
||||||
types_or: [yaml, json, markdown]
|
|
||||||
exclude: ^(Bash/|\.venv/|.*\.lock$|C/compile_commands\.json)
|
|
||||||
stages: [pre-push]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# SHELLCHECK - Shell script linting
|
|
||||||
# Wrapper batches files to avoid OOM on large repos.
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: shellcheck
|
|
||||||
name: shellcheck
|
|
||||||
entry: bash -c 'printf "%s\0" "$@" | xargs -0 -n 40 shellcheck --severity=warning' --
|
|
||||||
language: system
|
|
||||||
types: [shell]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# CLANG-FORMAT - C/C++ code formatting
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
|
||||||
rev: v19.1.6
|
|
||||||
hooks:
|
|
||||||
- id: clang-format
|
|
||||||
types_or: [c, c++]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# CPPCHECK - C/C++ static analysis
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: cppcheck
|
|
||||||
name: cppcheck
|
|
||||||
entry: cppcheck
|
|
||||||
language: system
|
|
||||||
types_or: [c, c++]
|
|
||||||
args:
|
|
||||||
- --enable=warning,portability
|
|
||||||
- --force
|
|
||||||
- --quiet
|
|
||||||
- --error-exitcode=1
|
|
||||||
- --inline-suppr
|
|
||||||
- --suppress=missingIncludeSystem
|
|
||||||
- --suppress=syntaxError
|
|
||||||
- --suppress=nullPointerOutOfResources
|
|
||||||
- --suppress=ctunullpointerOutOfResources
|
|
||||||
- --suppress=ctunullpointerOutOfMemory
|
|
||||||
- --std=c11
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# FLAWFINDER - C/C++ security scanner
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: flawfinder
|
|
||||||
name: flawfinder
|
|
||||||
entry: flawfinder
|
|
||||||
language: system
|
|
||||||
types_or: [c, c++]
|
|
||||||
args:
|
|
||||||
- --error-level=5
|
|
||||||
- --quiet
|
|
||||||
- --columns
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# CHECK C/C++ BUILD FILES - Ensure every C/C++ dir has Makefile and run.sh
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: check-c-cpp-build-files
|
|
||||||
name: check C/C++ dirs have Makefile and run.sh
|
|
||||||
entry: scripts/check_c_cpp_build_files.sh
|
|
||||||
language: script
|
|
||||||
types_or: [c, c++]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# CHECK PYTHON LOCATION - All Python files must be under python_pkg/
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: check-python-location
|
|
||||||
name: check Python files are under python_pkg/
|
|
||||||
entry: scripts/check_python_location.sh
|
|
||||||
language: script
|
|
||||||
types: [python]
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# REMOVE EMPTY DIRECTORIES - Clean up empty folders in the repo
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: remove-empty-dirs
|
|
||||||
name: remove empty directories
|
|
||||||
entry: find . -type d -empty -not -path './.git/*' -delete -print
|
|
||||||
language: system
|
|
||||||
pass_filenames: false
|
|
||||||
always_run: true
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# SECRET PATTERNS - Block commits containing sensitive data
|
|
||||||
# ===========================================================================
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: check-no-secrets
|
|
||||||
name: check for leaked secrets
|
|
||||||
entry: scripts/check_no_secrets.sh
|
|
||||||
language: script
|
|
||||||
exclude: ^(\.secret-patterns|\.pre-commit-config\.yaml|.*\.geojson)$
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# COMMITIZEN - Conventional commits (optional)
|
|
||||||
# ===========================================================================
|
|
||||||
# - repo: https://github.com/commitizen-tools/commitizen
|
|
||||||
# rev: v3.13.0
|
|
||||||
# hooks:
|
|
||||||
# - id: commitizen
|
|
||||||
# - id: commitizen-branch
|
|
||||||
# stages: [push]
|
|
||||||
1
.pre-commit-config.yaml
Symbolic link
1
.pre-commit-config.yaml
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
meta/.pre-commit-config.yaml
|
||||||
@ -1,10 +0,0 @@
|
|||||||
BasedOnStyle: LLVM
|
|
||||||
IndentWidth: 4
|
|
||||||
TabWidth: 4
|
|
||||||
UseTab: Never
|
|
||||||
ColumnLimit: 100
|
|
||||||
SortIncludes: true
|
|
||||||
AlignConsecutiveAssignments: true
|
|
||||||
AlignConsecutiveDeclarations: true
|
|
||||||
AllowShortIfStatementsOnASingleLine: false
|
|
||||||
BreakBeforeBraces: Allman
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
Checks: >
|
|
||||||
clang-analyzer-*,
|
|
||||||
-clang-analyzer-security.*,
|
|
||||||
bugprone-*,
|
|
||||||
cert-err33-c,
|
|
||||||
cert-err34-c,
|
|
||||||
cert-fio38-c,
|
|
||||||
performance-*,
|
|
||||||
portability-*,
|
|
||||||
misc-unused-parameters
|
|
||||||
WarningsAsErrors: >
|
|
||||||
clang-analyzer-*,
|
|
||||||
bugprone-*,
|
|
||||||
cert-err33-c,
|
|
||||||
cert-err34-c,
|
|
||||||
cert-fio38-c
|
|
||||||
HeaderFilterRegex: ".*"
|
|
||||||
FormatStyle: none
|
|
||||||
1
C/.gitignore
vendored
1
C/.gitignore
vendored
@ -1 +0,0 @@
|
|||||||
random_engine
|
|
||||||
@ -1,33 +0,0 @@
|
|||||||
CC := gcc
|
|
||||||
CFLAGS := -O2 -std=c11 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Wno-unused-parameter
|
|
||||||
COV := -O0 -g --coverage -std=c11 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Wno-unused-parameter -DATOP_AGG_NO_MAIN
|
|
||||||
|
|
||||||
SRC := atop_agg.c
|
|
||||||
HDR := atop_agg.h
|
|
||||||
BIN := atop_agg
|
|
||||||
|
|
||||||
.PHONY: all clean rebuild test coverage
|
|
||||||
|
|
||||||
all: $(BIN)
|
|
||||||
|
|
||||||
$(BIN): $(SRC) $(HDR)
|
|
||||||
$(CC) $(CFLAGS) -o $@ $(SRC)
|
|
||||||
|
|
||||||
test_atop_agg: test_atop_agg.c atop_agg.c atop_agg.h
|
|
||||||
$(CC) $(COV) -o test_atop_agg test_atop_agg.c atop_agg.c
|
|
||||||
|
|
||||||
test: test_atop_agg
|
|
||||||
./test_atop_agg
|
|
||||||
|
|
||||||
coverage: test_atop_agg
|
|
||||||
./test_atop_agg
|
|
||||||
lcov --capture --directory . --output-file coverage.info --no-external
|
|
||||||
lcov --remove coverage.info '*/test_atop_agg.c' --output-file coverage.info
|
|
||||||
genhtml coverage.info --output-directory coverage_html
|
|
||||||
@echo "Coverage report at coverage_html/index.html"
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f $(BIN) test_atop_agg *.o *.gcda *.gcno coverage.info
|
|
||||||
rm -rf coverage_html
|
|
||||||
|
|
||||||
rebuild: clean all
|
|
||||||
@ -1,474 +0,0 @@
|
|||||||
/*
|
|
||||||
* atop_agg — fast per-PID aggregator for `atop -P PRC,PRM` output.
|
|
||||||
*
|
|
||||||
* Reads atop parseable output on stdin, folds it into per-PID CPU-tick
|
|
||||||
* and RSS trackers, and prints a compact TSV summary on stdout that a
|
|
||||||
* higher-level driver (Python) then name-folds into human-readable
|
|
||||||
* tables. This avoids the ~3s Python parse cost on a typical day's
|
|
||||||
* 1.7M-line atop dump; the C hot loop completes in well under a second
|
|
||||||
* so the pipeline runs at atop's own ~2s wall-clock floor.
|
|
||||||
*
|
|
||||||
* Output TSV lines:
|
|
||||||
* W<TAB>start_epoch<TAB>end_epoch<TAB>distinct_samples<TAB>median_interval
|
|
||||||
* C<TAB>pid<TAB>name<TAB>delta_ticks
|
|
||||||
* R<TAB>pid<TAB>name<TAB>peak_kb<TAB>sum_kb<TAB>samples
|
|
||||||
*/
|
|
||||||
#include "atop_agg.h"
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* A real-world day of atop on a dev box can see >700k distinct PIDs
|
|
||||||
* because every short-lived compiler/shell subprocess gets a fresh ID.
|
|
||||||
* 2M slots keeps the load factor below ~40% for that workload, keeping
|
|
||||||
* linear-probe chains short without dynamic resizing.
|
|
||||||
*/
|
|
||||||
#define HASH_CAP_BITS 21
|
|
||||||
#define HASH_CAP (1u << HASH_CAP_BITS)
|
|
||||||
#define HASH_MASK (HASH_CAP - 1u)
|
|
||||||
#define MAX_EPOCHS 4096
|
|
||||||
#define MAX_TOKENS 64
|
|
||||||
|
|
||||||
/* Knuth multiplicative hash → index in an open-addressed table. */
|
|
||||||
static unsigned int hash_pid(int pid)
|
|
||||||
{
|
|
||||||
unsigned int k = (unsigned int)pid;
|
|
||||||
return (k * 2654435761u) >> (32 - HASH_CAP_BITS);
|
|
||||||
}
|
|
||||||
|
|
||||||
static PidCpu *cpu_slot(State *s, int pid)
|
|
||||||
{
|
|
||||||
unsigned int h = hash_pid(pid);
|
|
||||||
for (unsigned int probes = 0; probes < HASH_CAP; probes++, h++)
|
|
||||||
{
|
|
||||||
PidCpu *slot = &s->cpu[h & HASH_MASK];
|
|
||||||
if (slot->pid == pid)
|
|
||||||
{
|
|
||||||
return slot;
|
|
||||||
}
|
|
||||||
if (slot->pid == 0)
|
|
||||||
{
|
|
||||||
slot->pid = pid;
|
|
||||||
slot->first_ticks = -1;
|
|
||||||
slot->last_ticks = 0;
|
|
||||||
slot->samples = 0;
|
|
||||||
slot->name[0] = '\0';
|
|
||||||
return slot;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Table full — drop the sample rather than loop forever. */
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static PidRam *ram_slot(State *s, int pid)
|
|
||||||
{
|
|
||||||
unsigned int h = hash_pid(pid);
|
|
||||||
for (unsigned int probes = 0; probes < HASH_CAP; probes++, h++)
|
|
||||||
{
|
|
||||||
PidRam *slot = &s->ram[h & HASH_MASK];
|
|
||||||
if (slot->pid == pid)
|
|
||||||
{
|
|
||||||
return slot;
|
|
||||||
}
|
|
||||||
if (slot->pid == 0)
|
|
||||||
{
|
|
||||||
slot->pid = pid;
|
|
||||||
slot->peak_kb = 0;
|
|
||||||
slot->sum_kb = 0;
|
|
||||||
slot->samples = 0;
|
|
||||||
slot->name[0] = '\0';
|
|
||||||
return slot;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void add_epoch(State *s, long epoch)
|
|
||||||
{
|
|
||||||
/* Linear scan — there are only a few dozen distinct epochs per log. */
|
|
||||||
for (int i = 0; i < s->n_epochs; i++)
|
|
||||||
{
|
|
||||||
if (s->epochs[i] == epoch)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (s->n_epochs < MAX_EPOCHS)
|
|
||||||
{
|
|
||||||
s->epochs[s->n_epochs++] = epoch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Tokenise a whitespace-separated line in place. Fills *tokens* with
|
|
||||||
* pointers into *line* and returns the token count. A process name
|
|
||||||
* wrapped in parentheses is rejoined into a single token with spaces
|
|
||||||
* preserved (atop emits `(Web Content)` as three whitespace-split
|
|
||||||
* tokens, which we merge back).
|
|
||||||
*/
|
|
||||||
int tokenize_line(char *line, char **tokens, int max_tokens)
|
|
||||||
{
|
|
||||||
int n = 0;
|
|
||||||
char *p = line;
|
|
||||||
while (*p && n < max_tokens)
|
|
||||||
{
|
|
||||||
while (*p == ' ' || *p == '\t')
|
|
||||||
{
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (!*p || *p == '\n')
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
char *start = p;
|
|
||||||
if (*p == '(')
|
|
||||||
{
|
|
||||||
/* Consume through the matching ')', preserving interior spaces. */
|
|
||||||
while (*p && *p != ')')
|
|
||||||
{
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (*p == ')')
|
|
||||||
{
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
while (*p && *p != ' ' && *p != '\t' && *p != '\n')
|
|
||||||
{
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (*p)
|
|
||||||
{
|
|
||||||
*p = '\0';
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
tokens[n++] = start;
|
|
||||||
}
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copy *src* into *dst* (capacity *cap*), stripping a leading '(' and
|
|
||||||
* trailing ')' if both are present. Always null-terminates. If the
|
|
||||||
* resulting name is empty, writes "unknown".
|
|
||||||
*/
|
|
||||||
void copy_name(char *dst, size_t cap, const char *src)
|
|
||||||
{
|
|
||||||
size_t len = strlen(src);
|
|
||||||
size_t start = 0;
|
|
||||||
if (len >= 2 && src[0] == '(' && src[len - 1] == ')')
|
|
||||||
{
|
|
||||||
start = 1;
|
|
||||||
len -= 2;
|
|
||||||
}
|
|
||||||
if (len == 0)
|
|
||||||
{
|
|
||||||
const char *fallback = "unknown";
|
|
||||||
size_t flen = strlen(fallback);
|
|
||||||
if (flen >= cap)
|
|
||||||
{
|
|
||||||
flen = cap - 1;
|
|
||||||
}
|
|
||||||
memcpy(dst, fallback, flen);
|
|
||||||
dst[flen] = '\0';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (len >= cap)
|
|
||||||
{
|
|
||||||
len = cap - 1;
|
|
||||||
}
|
|
||||||
memcpy(dst, src + start, len);
|
|
||||||
dst[len] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Parse one PRC/PRM line and update *s*. Unknown labels and malformed
|
|
||||||
* records are silently skipped (atop emits a stable schema, but guard
|
|
||||||
* against future changes and header/separator lines).
|
|
||||||
*/
|
|
||||||
void process_line(char *line, State *s)
|
|
||||||
{
|
|
||||||
char *tokens[MAX_TOKENS];
|
|
||||||
int n = tokenize_line(line, tokens, MAX_TOKENS);
|
|
||||||
if (n < 11)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const char *label = tokens[0];
|
|
||||||
int is_prc = (label[0] == 'P' && label[1] == 'R' && label[2] == 'C' && label[3] == '\0');
|
|
||||||
int is_prm = (label[0] == 'P' && label[1] == 'R' && label[2] == 'M' && label[3] == '\0');
|
|
||||||
if (!is_prc && !is_prm)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
long epoch = strtol(tokens[2], NULL, 10);
|
|
||||||
int pid = (int)strtol(tokens[6], NULL, 10);
|
|
||||||
if (pid <= 0)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const char *name_tok = tokens[7];
|
|
||||||
if (is_prc)
|
|
||||||
{
|
|
||||||
long utime = strtol(tokens[9], NULL, 10);
|
|
||||||
long stime = strtol(tokens[10], NULL, 10);
|
|
||||||
long ticks = utime + stime;
|
|
||||||
add_epoch(s, epoch);
|
|
||||||
PidCpu *slot = cpu_slot(s, pid);
|
|
||||||
if (slot == NULL)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (slot->first_ticks < 0)
|
|
||||||
{
|
|
||||||
slot->first_ticks = ticks;
|
|
||||||
}
|
|
||||||
slot->last_ticks = ticks;
|
|
||||||
slot->samples++;
|
|
||||||
copy_name(slot->name, sizeof(slot->name), name_tok);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* PRM */
|
|
||||||
if (n < 12)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
long rsize_kb = strtol(tokens[11], NULL, 10);
|
|
||||||
PidRam *slot = ram_slot(s, pid);
|
|
||||||
if (slot == NULL)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (rsize_kb > slot->peak_kb)
|
|
||||||
{
|
|
||||||
slot->peak_kb = rsize_kb;
|
|
||||||
}
|
|
||||||
slot->sum_kb += rsize_kb;
|
|
||||||
slot->samples++;
|
|
||||||
copy_name(slot->name, sizeof(slot->name), name_tok);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int cmp_long(const void *a, const void *b)
|
|
||||||
{
|
|
||||||
long la = *(const long *)a;
|
|
||||||
long lb = *(const long *)b;
|
|
||||||
if (la < lb)
|
|
||||||
{
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (la > lb)
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* FNV-1a 32-bit over a NUL-terminated string; used to key the name table. */
|
|
||||||
static unsigned int fnv1a(const char *s)
|
|
||||||
{
|
|
||||||
unsigned int h = 2166136261u;
|
|
||||||
while (*s)
|
|
||||||
{
|
|
||||||
h ^= (unsigned char)*s++;
|
|
||||||
h *= 16777619u;
|
|
||||||
}
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Per-name aggregate, built in a second pass over cpu/ram tables so that
|
|
||||||
* the caller only has to parse a few thousand output rows instead of one
|
|
||||||
* row per PID. The name table is deliberately oversized (64k slots for an
|
|
||||||
* expected few-thousand names) to keep linear-probe chains short.
|
|
||||||
*/
|
|
||||||
#define NAME_CAP_BITS 16
|
|
||||||
#define NAME_CAP (1u << NAME_CAP_BITS)
|
|
||||||
#define NAME_MASK (NAME_CAP - 1u)
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
char name[ATOP_AGG_NAME_MAX];
|
|
||||||
long cpu_ticks;
|
|
||||||
int cpu_pids;
|
|
||||||
long peak_kb;
|
|
||||||
long sum_avg_kb;
|
|
||||||
int rss_samples;
|
|
||||||
int ram_pids;
|
|
||||||
char used;
|
|
||||||
} NameAgg;
|
|
||||||
|
|
||||||
static NameAgg *name_slot(NameAgg *table, const char *name)
|
|
||||||
{
|
|
||||||
unsigned int h = fnv1a(name);
|
|
||||||
for (unsigned int probes = 0; probes < NAME_CAP; probes++, h++)
|
|
||||||
{
|
|
||||||
NameAgg *slot = &table[h & NAME_MASK];
|
|
||||||
if (!slot->used)
|
|
||||||
{
|
|
||||||
slot->used = 1;
|
|
||||||
/* copy_name already enforced \0-termination on the source. */
|
|
||||||
size_t i = 0;
|
|
||||||
while (name[i] && i + 1 < sizeof(slot->name))
|
|
||||||
{
|
|
||||||
slot->name[i] = name[i];
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
slot->name[i] = '\0';
|
|
||||||
return slot;
|
|
||||||
}
|
|
||||||
if (strcmp(slot->name, name) == 0)
|
|
||||||
{
|
|
||||||
return slot;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Write the aggregated summary to *out* in the documented TSV schema. */
|
|
||||||
void emit_results(State *s, FILE *out)
|
|
||||||
{
|
|
||||||
long start_epoch = 0;
|
|
||||||
long end_epoch = 0;
|
|
||||||
long median_interval = 0;
|
|
||||||
if (s->n_epochs > 0)
|
|
||||||
{
|
|
||||||
qsort(s->epochs, (size_t)s->n_epochs, sizeof(long), cmp_long);
|
|
||||||
start_epoch = s->epochs[0];
|
|
||||||
end_epoch = s->epochs[s->n_epochs - 1];
|
|
||||||
if (s->n_epochs >= 2)
|
|
||||||
{
|
|
||||||
long deltas[MAX_EPOCHS];
|
|
||||||
for (int i = 0; i < s->n_epochs - 1; i++)
|
|
||||||
{
|
|
||||||
deltas[i] = s->epochs[i + 1] - s->epochs[i];
|
|
||||||
}
|
|
||||||
qsort(deltas, (size_t)(s->n_epochs - 1), sizeof(long), cmp_long);
|
|
||||||
median_interval = deltas[(s->n_epochs - 1) / 2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fprintf(out, "W\t%ld\t%ld\t%d\t%ld\n", start_epoch, end_epoch, s->n_epochs, median_interval);
|
|
||||||
|
|
||||||
NameAgg *names = calloc(NAME_CAP, sizeof(NameAgg));
|
|
||||||
if (!names)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (unsigned int i = 0; i < HASH_CAP; i++)
|
|
||||||
{
|
|
||||||
PidCpu *slot = &s->cpu[i];
|
|
||||||
if (slot->pid == 0)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
long delta = slot->last_ticks;
|
|
||||||
if (slot->samples >= 2)
|
|
||||||
{
|
|
||||||
delta = slot->last_ticks - slot->first_ticks;
|
|
||||||
if (delta < 0)
|
|
||||||
{
|
|
||||||
delta = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NameAgg *na = name_slot(names, slot->name);
|
|
||||||
if (!na)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
na->cpu_ticks += delta;
|
|
||||||
na->cpu_pids++;
|
|
||||||
}
|
|
||||||
for (unsigned int i = 0; i < HASH_CAP; i++)
|
|
||||||
{
|
|
||||||
PidRam *slot = &s->ram[i];
|
|
||||||
if (slot->pid == 0)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
long avg_kb = slot->samples ? slot->sum_kb / slot->samples : 0;
|
|
||||||
NameAgg *na = name_slot(names, slot->name);
|
|
||||||
if (!na)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (slot->peak_kb > na->peak_kb)
|
|
||||||
{
|
|
||||||
na->peak_kb = slot->peak_kb;
|
|
||||||
}
|
|
||||||
na->sum_avg_kb += avg_kb;
|
|
||||||
na->rss_samples++;
|
|
||||||
na->ram_pids++;
|
|
||||||
}
|
|
||||||
for (unsigned int i = 0; i < NAME_CAP; i++)
|
|
||||||
{
|
|
||||||
NameAgg *na = &names[i];
|
|
||||||
if (!na->used)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int pids = na->cpu_pids > na->ram_pids ? na->cpu_pids : na->ram_pids;
|
|
||||||
fprintf(out, "N\t%s\t%ld\t%ld\t%ld\t%d\t%d\n", na->name, na->cpu_ticks, na->peak_kb,
|
|
||||||
na->sum_avg_kb, na->rss_samples, pids);
|
|
||||||
}
|
|
||||||
free(names);
|
|
||||||
}
|
|
||||||
|
|
||||||
State *state_new(void)
|
|
||||||
{
|
|
||||||
State *s = calloc(1, sizeof(State));
|
|
||||||
if (!s)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
s->cpu = calloc(HASH_CAP, sizeof(PidCpu));
|
|
||||||
s->ram = calloc(HASH_CAP, sizeof(PidRam));
|
|
||||||
s->epochs = calloc(MAX_EPOCHS, sizeof(long));
|
|
||||||
if (!s->cpu || !s->ram || !s->epochs)
|
|
||||||
{
|
|
||||||
state_free(s);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
s->n_epochs = 0;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void state_free(State *s)
|
|
||||||
{
|
|
||||||
if (!s)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
free(s->cpu);
|
|
||||||
free(s->ram);
|
|
||||||
free(s->epochs);
|
|
||||||
free(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef ATOP_AGG_NO_MAIN
|
|
||||||
int main(void)
|
|
||||||
{
|
|
||||||
State *s = state_new();
|
|
||||||
if (!s)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "atop_agg: out of memory\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
char *line = NULL;
|
|
||||||
size_t cap = 0;
|
|
||||||
ssize_t got;
|
|
||||||
while ((got = getline(&line, &cap, stdin)) != -1)
|
|
||||||
{
|
|
||||||
process_line(line, s);
|
|
||||||
}
|
|
||||||
free(line);
|
|
||||||
emit_results(s, stdout);
|
|
||||||
state_free(s);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@ -1,42 +0,0 @@
|
|||||||
#ifndef ATOP_AGG_H
|
|
||||||
#define ATOP_AGG_H
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
/* NAME_MAX capped to keep slot size compact; typical atop comm is 15 chars. */
|
|
||||||
#define ATOP_AGG_NAME_MAX 40
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int pid;
|
|
||||||
char name[ATOP_AGG_NAME_MAX];
|
|
||||||
long first_ticks;
|
|
||||||
long last_ticks;
|
|
||||||
int samples;
|
|
||||||
} PidCpu;
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int pid;
|
|
||||||
char name[ATOP_AGG_NAME_MAX];
|
|
||||||
long peak_kb;
|
|
||||||
long sum_kb;
|
|
||||||
int samples;
|
|
||||||
} PidRam;
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
PidCpu *cpu;
|
|
||||||
PidRam *ram;
|
|
||||||
long *epochs;
|
|
||||||
int n_epochs;
|
|
||||||
} State;
|
|
||||||
|
|
||||||
State *state_new(void);
|
|
||||||
void state_free(State *s);
|
|
||||||
int tokenize_line(char *line, char **tokens, int max_tokens);
|
|
||||||
void copy_name(char *dst, size_t cap, const char *src);
|
|
||||||
void process_line(char *line, State *s);
|
|
||||||
void emit_results(State *s, FILE *out);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,12 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# Build and demo atop_agg on today's atop log.
|
|
||||||
set -euo pipefail
|
|
||||||
cd "$(dirname "$0")"
|
|
||||||
make
|
|
||||||
LOG="${1:-/var/log/atop/atop_$(date +%Y%m%d)}"
|
|
||||||
if [[ ! -f "$LOG" ]]; then
|
|
||||||
echo "No atop log at $LOG; pass a path as arg 1." >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Aggregating $LOG ..." >&2
|
|
||||||
atop -r "$LOG" -P PRC,PRM | ./atop_agg | head -20
|
|
||||||
@ -1,226 +0,0 @@
|
|||||||
/*
|
|
||||||
* Unit tests for atop_agg helpers. Compiled with --coverage; aims for
|
|
||||||
* 100% line coverage of atop_agg.c (excluding main, which is guarded
|
|
||||||
* by -DATOP_AGG_NO_MAIN).
|
|
||||||
*/
|
|
||||||
#include "atop_agg.h"
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
static int failures = 0;
|
|
||||||
|
|
||||||
#define CHECK(cond) \
|
|
||||||
do \
|
|
||||||
{ \
|
|
||||||
if (!(cond)) \
|
|
||||||
{ \
|
|
||||||
fprintf(stderr, "FAIL %s:%d: %s\n", __FILE__, __LINE__, #cond); \
|
|
||||||
failures++; \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
static void test_copy_name(void)
|
|
||||||
{
|
|
||||||
char buf[16];
|
|
||||||
copy_name(buf, sizeof(buf), "(bash)");
|
|
||||||
CHECK(strcmp(buf, "bash") == 0);
|
|
||||||
|
|
||||||
copy_name(buf, sizeof(buf), "bash");
|
|
||||||
CHECK(strcmp(buf, "bash") == 0);
|
|
||||||
|
|
||||||
copy_name(buf, sizeof(buf), "()");
|
|
||||||
CHECK(strcmp(buf, "unknown") == 0);
|
|
||||||
|
|
||||||
copy_name(buf, sizeof(buf), "");
|
|
||||||
CHECK(strcmp(buf, "unknown") == 0);
|
|
||||||
|
|
||||||
/* Truncation. */
|
|
||||||
copy_name(buf, sizeof(buf), "(veryverylongnameabc)");
|
|
||||||
CHECK(strlen(buf) == sizeof(buf) - 1);
|
|
||||||
|
|
||||||
/* Fallback truncation: buf too small for "unknown" itself. */
|
|
||||||
char tiny[4];
|
|
||||||
copy_name(tiny, sizeof(tiny), "");
|
|
||||||
CHECK(strcmp(tiny, "unk") == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_tokenize(void)
|
|
||||||
{
|
|
||||||
char line[] = "PRC host 1000 2026/01/01 12:00:00 600 123 (bash) S 10 20\n";
|
|
||||||
char *toks[32];
|
|
||||||
int n = tokenize_line(line, toks, 32);
|
|
||||||
CHECK(n == 11);
|
|
||||||
CHECK(strcmp(toks[0], "PRC") == 0);
|
|
||||||
CHECK(strcmp(toks[7], "(bash)") == 0);
|
|
||||||
CHECK(strcmp(toks[10], "20") == 0);
|
|
||||||
|
|
||||||
/* Multi-word parenthesised name. */
|
|
||||||
char line2[] = "PRM host 1000 d t 600 200 (Web Content) S 4096 1 2 0 0\n";
|
|
||||||
char *t2[32];
|
|
||||||
int n2 = tokenize_line(line2, t2, 32);
|
|
||||||
CHECK(n2 >= 12);
|
|
||||||
CHECK(strncmp(t2[7], "(Web Content)", 13) == 0);
|
|
||||||
|
|
||||||
/* Empty / whitespace-only line. */
|
|
||||||
char empty[] = " \n";
|
|
||||||
char *t3[4];
|
|
||||||
CHECK(tokenize_line(empty, t3, 4) == 0);
|
|
||||||
|
|
||||||
/* Max-tokens cap respected. */
|
|
||||||
char big[] = "a b c d e f g h i j k";
|
|
||||||
char *t4[3];
|
|
||||||
CHECK(tokenize_line(big, t4, 3) == 3);
|
|
||||||
|
|
||||||
/* Unclosed paren at EOL — consumed to end. */
|
|
||||||
char unclosed[] = "(abc";
|
|
||||||
char *t5[2];
|
|
||||||
int n5 = tokenize_line(unclosed, t5, 2);
|
|
||||||
CHECK(n5 == 1);
|
|
||||||
CHECK(strcmp(t5[0], "(abc") == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_process_and_emit(void)
|
|
||||||
{
|
|
||||||
State *s = state_new();
|
|
||||||
assert(s != NULL);
|
|
||||||
|
|
||||||
/* Two PRC samples for PID 100: first utime+stime=30, last=100.
|
|
||||||
Delta should be 70. */
|
|
||||||
char prc1[] = "PRC h 1000 d t 600 100 (cc1) S 10 20\n";
|
|
||||||
char prc2[] = "PRC h 1600 d t 600 100 (cc1) S 70 30\n";
|
|
||||||
process_line(prc1, s);
|
|
||||||
process_line(prc2, s);
|
|
||||||
|
|
||||||
/* One PRM sample for PID 100: rss=4096 kB. */
|
|
||||||
char prm1[] = "PRM h 1000 d t 600 100 (cc1) S 4096 100 4096 0 0\n";
|
|
||||||
process_line(prm1, s);
|
|
||||||
|
|
||||||
/* PRC sample for PID 200 seen only once → delta == last_ticks. */
|
|
||||||
char prc3[] = "PRC h 1000 d t 600 200 (short) S 5 5\n";
|
|
||||||
process_line(prc3, s);
|
|
||||||
|
|
||||||
/* Header / separator / unknown label should be ignored. */
|
|
||||||
char header[] = "# comment line\n";
|
|
||||||
process_line(header, s);
|
|
||||||
char sep[] = "SEP\n";
|
|
||||||
process_line(sep, s);
|
|
||||||
char other[] = "CPU h 1000 d t 600 0 0 0 0 0 0 0 0\n";
|
|
||||||
process_line(other, s);
|
|
||||||
|
|
||||||
/* Malformed: pid <= 0. */
|
|
||||||
char bad_pid[] = "PRC h 1000 d t 600 0 (x) S 1 1\n";
|
|
||||||
process_line(bad_pid, s);
|
|
||||||
|
|
||||||
/* PRC short (<11 tokens) should not crash. */
|
|
||||||
char prc_short[] = "PRC h 1000 d t 600 300 (y) S 1\n";
|
|
||||||
process_line(prc_short, s);
|
|
||||||
|
|
||||||
/* PRM short (<12 tokens) should not crash. */
|
|
||||||
char prm_short[] = "PRM h 1000 d t 600 300 (y) S 4096 1 1 0\n";
|
|
||||||
process_line(prm_short, s);
|
|
||||||
|
|
||||||
/* Emit and sanity-check the output. */
|
|
||||||
char *buf = NULL;
|
|
||||||
size_t sz = 0;
|
|
||||||
FILE *out = open_memstream(&buf, &sz);
|
|
||||||
assert(out != NULL);
|
|
||||||
emit_results(s, out);
|
|
||||||
fclose(out);
|
|
||||||
CHECK(strstr(buf, "W\t1000\t1600\t2\t600\n") != NULL);
|
|
||||||
/* cc1: cpu delta 70 (pid 100 two samples) + 0 pids column via max(cpu,ram).
|
|
||||||
Peak RSS 4096, sum_avg 4096, rss_samples 1, pids max(1,1)=1. */
|
|
||||||
CHECK(strstr(buf, "N\tcc1\t70\t4096\t4096\t1\t1\n") != NULL);
|
|
||||||
/* short: single-sample pid 200 → delta == 10; no RAM, so peak/sum/rss=0. */
|
|
||||||
CHECK(strstr(buf, "N\tshort\t10\t0\t0\t0\t1\n") != NULL);
|
|
||||||
free(buf);
|
|
||||||
state_free(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_empty_and_single_epoch(void)
|
|
||||||
{
|
|
||||||
State *s = state_new();
|
|
||||||
/* No input at all → window line with zeroes. */
|
|
||||||
char *buf = NULL;
|
|
||||||
size_t sz = 0;
|
|
||||||
FILE *out = open_memstream(&buf, &sz);
|
|
||||||
emit_results(s, out);
|
|
||||||
fclose(out);
|
|
||||||
CHECK(strstr(buf, "W\t0\t0\t0\t0\n") != NULL);
|
|
||||||
free(buf);
|
|
||||||
state_free(s);
|
|
||||||
|
|
||||||
/* Exactly one epoch → median interval stays 0. */
|
|
||||||
s = state_new();
|
|
||||||
char prc[] = "PRC h 500 d t 600 50 (a) S 1 1\n";
|
|
||||||
process_line(prc, s);
|
|
||||||
buf = NULL;
|
|
||||||
sz = 0;
|
|
||||||
out = open_memstream(&buf, &sz);
|
|
||||||
emit_results(s, out);
|
|
||||||
fclose(out);
|
|
||||||
CHECK(strstr(buf, "W\t500\t500\t1\t0\n") != NULL);
|
|
||||||
free(buf);
|
|
||||||
state_free(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_delta_clamped_to_zero(void)
|
|
||||||
{
|
|
||||||
/* Counter reset: last < first → delta must clamp to 0. */
|
|
||||||
State *s = state_new();
|
|
||||||
char a[] = "PRC h 100 d t 600 77 (x) S 50 50\n";
|
|
||||||
char b[] = "PRC h 700 d t 600 77 (x) S 10 10\n";
|
|
||||||
process_line(a, s);
|
|
||||||
process_line(b, s);
|
|
||||||
char *buf = NULL;
|
|
||||||
size_t sz = 0;
|
|
||||||
FILE *out = open_memstream(&buf, &sz);
|
|
||||||
emit_results(s, out);
|
|
||||||
fclose(out);
|
|
||||||
CHECK(strstr(buf, "N\tx\t0\t") != NULL);
|
|
||||||
free(buf);
|
|
||||||
state_free(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_hash_collision(void)
|
|
||||||
{
|
|
||||||
/* Force two PIDs into adjacent slots (Knuth hash rarely collides on
|
|
||||||
small integers, but we sweep a range to exercise the linear-probe
|
|
||||||
branch). */
|
|
||||||
State *s = state_new();
|
|
||||||
for (int pid = 1; pid <= 2000; pid++)
|
|
||||||
{
|
|
||||||
char line[128];
|
|
||||||
snprintf(line, sizeof(line), "PRC h 1000 d t 600 %d (p) S 1 1\n", pid);
|
|
||||||
process_line(line, s);
|
|
||||||
snprintf(line, sizeof(line), "PRM h 1000 d t 600 %d (p) S 4096 1 1 0 0\n", pid);
|
|
||||||
process_line(line, s);
|
|
||||||
}
|
|
||||||
state_free(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_state_free_null(void)
|
|
||||||
{
|
|
||||||
/* Freeing NULL must be safe. */
|
|
||||||
state_free(NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(void)
|
|
||||||
{
|
|
||||||
test_copy_name();
|
|
||||||
test_tokenize();
|
|
||||||
test_process_and_emit();
|
|
||||||
test_empty_and_single_epoch();
|
|
||||||
test_delta_clamped_to_zero();
|
|
||||||
test_hash_collision();
|
|
||||||
test_state_free_null();
|
|
||||||
if (failures > 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "%d test failures\n", failures);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
printf("atop_agg tests: OK\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
File diff suppressed because one or more lines are too long
@ -1,40 +0,0 @@
|
|||||||
Checking 1dvelocitysimulator/main.c ...
|
|
||||||
1/20 files checked 2% done
|
|
||||||
Checking fps/main.c ...
|
|
||||||
2/20 files checked 10% done
|
|
||||||
Checking imageViewer/main.c ...
|
|
||||||
3/20 files checked 37% done
|
|
||||||
Checking lichess_random_engine/main.c ...
|
|
||||||
4/20 files checked 40% done
|
|
||||||
Checking lichess_random_engine/micro_max.c ...
|
|
||||||
5/20 files checked 49% done
|
|
||||||
Checking lichess_random_engine/movegen.c ...
|
|
||||||
6/20 files checked 60% done
|
|
||||||
Checking lichess_random_engine/perft.c ...
|
|
||||||
7/20 files checked 61% done
|
|
||||||
Checking lichess_random_engine/search.c ...
|
|
||||||
8/20 files checked 62% done
|
|
||||||
Checking misc/generatingWordsEndingWIthalka.c ...
|
|
||||||
9/20 files checked 63% done
|
|
||||||
Checking misc/randomJPG/generate_images.c ...
|
|
||||||
10/20 files checked 68% done
|
|
||||||
Checking misc/randomJPG/generate_jpg.c ...
|
|
||||||
11/20 files checked 73% done
|
|
||||||
Checking misc/split/main.c ...
|
|
||||||
12/20 files checked 74% done
|
|
||||||
Checking opening_learner/chess.c ...
|
|
||||||
13/20 files checked 83% done
|
|
||||||
Checking opening_learner/engine.c ...
|
|
||||||
14/20 files checked 86% done
|
|
||||||
Checking opening_learner/gui.c ...
|
|
||||||
15/20 files checked 90% done
|
|
||||||
Checking opening_learner/main.c ...
|
|
||||||
16/20 files checked 93% done
|
|
||||||
Checking opening_learner/mistakes.c ...
|
|
||||||
17/20 files checked 95% done
|
|
||||||
Checking scrapeWebsite/scrape.c ...
|
|
||||||
18/20 files checked 98% done
|
|
||||||
Checking tests/generatingPolishLettersOnWindowsTerminal.c ...
|
|
||||||
19/20 files checked 98% done
|
|
||||||
Checking websocketServer/main.c ...
|
|
||||||
20/20 files checked 100% done
|
|
||||||
627
C/flawfinder.txt
627
C/flawfinder.txt
@ -1,627 +0,0 @@
|
|||||||
Flawfinder version 2.0.19, (C) 2001-2019 David A. Wheeler.
|
|
||||||
Number of rules (primarily dangerous function names) in C/C++ ruleset: 222
|
|
||||||
./1dvelocitysimulator/main.c:16:5: [4] (shell) system:
|
|
||||||
This causes a new program to execute and is difficult to use safely
|
|
||||||
(CWE-78). try using a library call that implements the same functionality
|
|
||||||
if available.
|
|
||||||
./1dvelocitysimulator/main.c:22:5: [4] (shell) system:
|
|
||||||
This causes a new program to execute and is difficult to use safely
|
|
||||||
(CWE-78). try using a library call that implements the same functionality
|
|
||||||
if available.
|
|
||||||
./1dvelocitysimulator/main.c:27:5: [4] (shell) system:
|
|
||||||
This causes a new program to execute and is difficult to use safely
|
|
||||||
(CWE-78). try using a library call that implements the same functionality
|
|
||||||
if available.
|
|
||||||
./lichess_random_engine/movegen.c:35:20: [4] (buffer) strcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination [MS-banned]
|
|
||||||
(CWE-120). Consider using snprintf, strcpy_s, or strlcpy (warning: strncpy
|
|
||||||
easily misused).
|
|
||||||
./opening_learner/engine.c:21:9: [4] (shell) execlp:
|
|
||||||
This causes a new program to execute and is difficult to use safely
|
|
||||||
(CWE-78). try using a library call that implements the same functionality
|
|
||||||
if available.
|
|
||||||
./scrapeWebsite/scrape.c:49:8: [4] (race) access:
|
|
||||||
This usually indicates a security flaw. If an attacker can change anything
|
|
||||||
along the path between the call to access() and the file's actual use
|
|
||||||
(e.g., by moving files), the attacker can exploit the race condition
|
|
||||||
(CWE-362/CWE-367!). Set up the correct permissions (e.g., using setuid())
|
|
||||||
and try to open the file directly.
|
|
||||||
./fps/main.c:521:2: [3] (random) srand:
|
|
||||||
This function is not sufficiently random for security-related functions
|
|
||||||
such as key and nonce creation (CWE-327). Use a more secure technique for
|
|
||||||
acquiring random values.
|
|
||||||
./lichess_random_engine/main.c:112:2: [3] (random) srand:
|
|
||||||
This function is not sufficiently random for security-related functions
|
|
||||||
such as key and nonce creation (CWE-327). Use a more secure technique for
|
|
||||||
acquiring random values.
|
|
||||||
./lichess_random_engine/micro_max.c:228:52: [3] (random) srand:
|
|
||||||
This function is not sufficiently random for security-related functions
|
|
||||||
such as key and nonce creation (CWE-327). Use a more secure technique for
|
|
||||||
acquiring random values.
|
|
||||||
./misc/randomJPG/generate_images.c:257:5: [3] (random) srand:
|
|
||||||
This function is not sufficiently random for security-related functions
|
|
||||||
such as key and nonce creation (CWE-327). Use a more secure technique for
|
|
||||||
acquiring random values.
|
|
||||||
./misc/randomJPG/generate_jpg.c:208:5: [3] (random) srand:
|
|
||||||
This function is not sufficiently random for security-related functions
|
|
||||||
such as key and nonce creation (CWE-327). Use a more secure technique for
|
|
||||||
acquiring random values.
|
|
||||||
./opening_learner/main.c:49:2: [3] (random) srand:
|
|
||||||
This function is not sufficiently random for security-related functions
|
|
||||||
such as key and nonce creation (CWE-327). Use a more secure technique for
|
|
||||||
acquiring random values.
|
|
||||||
./fps/main.c:338:3: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:26:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:34:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:87:5: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./imageViewer/main.c:416:17: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:447:17: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:475:17: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:553:17: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:585:17: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:614:17: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:689:12: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:1137:9: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./imageViewer/main.c:1181:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:1188:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:1200:9: [2] (buffer) strcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination [MS-banned]
|
|
||||||
(CWE-120). Consider using snprintf, strcpy_s, or strlcpy (warning: strncpy
|
|
||||||
easily misused). Risk is low because the source is a constant string.
|
|
||||||
./imageViewer/main.c:1207:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./imageViewer/main.c:1208:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./lichess_random_engine/micro_max.c:15:6: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./lichess_random_engine/micro_max.c:179:6: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./lichess_random_engine/movegen.c:35:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./lichess_random_engine/movegen.c:36:5: [2] (buffer) strcat:
|
|
||||||
Does not check for buffer overflows when concatenating to destination
|
|
||||||
[MS-banned] (CWE-120). Consider using strcat_s, strncat, strlcat, or
|
|
||||||
snprintf (warning: strncat is easily misused). Risk is low because the
|
|
||||||
source is a constant string.
|
|
||||||
./lichess_random_engine/perft.c:38:21: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./lichess_random_engine/perft.c:46:17: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./lichess_random_engine/perft.c:53:36: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./misc/randomJPG/generate_images.c:106:21: [2] (misc) fopen:
|
|
||||||
Check when opening files - can an attacker redirect it (via symlinks),
|
|
||||||
force the opening of special file type (e.g., device files), move things
|
|
||||||
around to create a race condition, control its ancestors, or change its
|
|
||||||
contents? (CWE-362).
|
|
||||||
./misc/randomJPG/generate_images.c:117:21: [2] (misc) fopen:
|
|
||||||
Check when opening files - can an attacker redirect it (via symlinks),
|
|
||||||
force the opening of special file type (e.g., device files), move things
|
|
||||||
around to create a race condition, control its ancestors, or change its
|
|
||||||
contents? (CWE-362).
|
|
||||||
./misc/randomJPG/generate_images.c:121:14: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./misc/randomJPG/generate_images.c:124:14: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./misc/randomJPG/generate_images.c:163:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./misc/randomJPG/generate_images.c:234:33: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_images.c:235:27: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_images.c:236:33: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_images.c:237:30: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_images.c:273:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./misc/randomJPG/generate_jpg.c:106:21: [2] (misc) fopen:
|
|
||||||
Check when opening files - can an attacker redirect it (via symlinks),
|
|
||||||
force the opening of special file type (e.g., device files), move things
|
|
||||||
around to create a race condition, control its ancestors, or change its
|
|
||||||
contents? (CWE-362).
|
|
||||||
./misc/randomJPG/generate_jpg.c:124:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./misc/randomJPG/generate_jpg.c:186:33: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_jpg.c:187:27: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_jpg.c:188:33: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_jpg.c:189:30: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./misc/randomJPG/generate_jpg.c:224:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/chess.c:253:33: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/chess.c:270:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/chess.h:11:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/chess.h:48:33: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:36:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:82:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:88:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:90:41: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:92:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:104:31: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./opening_learner/engine.c:105:66: [2] (integer) atoi:
|
|
||||||
Unless checked, the resulting number can exceed the expected range
|
|
||||||
(CWE-190). If source untrusted, check both minimum and maximum, even if the
|
|
||||||
input had no minus sign (large numbers can roll over into negative number;
|
|
||||||
consider saving to an unsigned value if that is intended).
|
|
||||||
./opening_learner/engine.c:106:25: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:124:59: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:126:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.c:128:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.h:11:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/engine.h:32:59: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/gui.c:73:29: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/gui.h:24:29: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:29:2: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:36:38: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:77:2: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:79:2: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:83:2: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:95:4: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:99:4: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:103:6: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./opening_learner/main.c:136:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:155:4: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/main.c:164:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/mistakes.c:32:15: [2] (misc) fopen:
|
|
||||||
Check when opening files - can an attacker redirect it (via symlinks),
|
|
||||||
force the opening of special file type (e.g., device files), move things
|
|
||||||
around to create a race condition, control its ancestors, or change its
|
|
||||||
contents? (CWE-362).
|
|
||||||
./opening_learner/mistakes.c:42:15: [2] (misc) fopen:
|
|
||||||
Check when opening files - can an attacker redirect it (via symlinks),
|
|
||||||
force the opening of special file type (e.g., device files), move things
|
|
||||||
around to create a race condition, control its ancestors, or change its
|
|
||||||
contents? (CWE-362).
|
|
||||||
./opening_learner/mistakes.c:44:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/mistakes.c:44:21: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/mistakes.c:44:41: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/mistakes.c:44:61: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/mistakes.c:49:13: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./opening_learner/mistakes.c:53:13: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./opening_learner/mistakes.c:57:13: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./opening_learner/mistakes.h:10:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/mistakes.h:11:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./opening_learner/mistakes.h:13:5: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./scrapeWebsite/scrape.c:28:5: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./scrapeWebsite/scrape.c:56:20: [2] (misc) fopen:
|
|
||||||
Check when opening files - can an attacker redirect it (via symlinks),
|
|
||||||
force the opening of special file type (e.g., device files), move things
|
|
||||||
around to create a race condition, control its ancestors, or change its
|
|
||||||
contents? (CWE-362).
|
|
||||||
./websocketServer/main.c:22:22: [2] (buffer) char:
|
|
||||||
Statically-sized arrays can be improperly restricted, leading to potential
|
|
||||||
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
|
|
||||||
functions that limit length, or ensure that the size is larger than the
|
|
||||||
maximum possible length.
|
|
||||||
./websocketServer/main.c:24:13: [2] (buffer) memcpy:
|
|
||||||
Does not check for buffer overflows when copying to destination (CWE-120).
|
|
||||||
Make sure destination can always hold the source data.
|
|
||||||
./fps/main.c:345:22: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./fps/main.c:346:22: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./fps/main.c:347:22: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:233:27: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:404:27: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:453:49: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:455:43: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:476:31: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:477:31: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:493:33: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:494:33: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:592:49: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:594:43: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:615:31: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:616:31: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:632:33: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:633:33: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:1182:18: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./imageViewer/main.c:1191:23: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./lichess_random_engine/micro_max.c:163:18: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./lichess_random_engine/micro_max.c:241:11: [1] (buffer) strncpy:
|
|
||||||
Easily used incorrectly; doesn't always \0-terminate or check for invalid
|
|
||||||
pointers [MS-banned] (CWE-120).
|
|
||||||
./lichess_random_engine/movegen.c:428:18: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./lichess_random_engine/movegen.c:439:25: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./opening_learner/chess.c:261:15: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./opening_learner/engine.c:38:9: [1] (obsolete) usleep:
|
|
||||||
This C routine is considered obsolete (as opposed to the shell command by
|
|
||||||
the same name). The interaction of this function with SIGALRM and other
|
|
||||||
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
|
|
||||||
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
|
|
||||||
./opening_learner/engine.c:39:21: [1] (buffer) read:
|
|
||||||
Check buffer boundaries if used in a loop including recursive loops
|
|
||||||
(CWE-120, CWE-20).
|
|
||||||
./opening_learner/engine.c:49:9: [1] (obsolete) usleep:
|
|
||||||
This C routine is considered obsolete (as opposed to the shell command by
|
|
||||||
the same name). The interaction of this function with SIGALRM and other
|
|
||||||
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
|
|
||||||
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
|
|
||||||
./opening_learner/engine.c:50:21: [1] (buffer) read:
|
|
||||||
Check buffer boundaries if used in a loop including recursive loops
|
|
||||||
(CWE-120, CWE-20).
|
|
||||||
./opening_learner/engine.c:72:18: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
./opening_learner/engine.c:94:9: [1] (obsolete) usleep:
|
|
||||||
This C routine is considered obsolete (as opposed to the shell command by
|
|
||||||
the same name). The interaction of this function with SIGALRM and other
|
|
||||||
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
|
|
||||||
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
|
|
||||||
./opening_learner/engine.c:95:17: [1] (buffer) read:
|
|
||||||
Check buffer boundaries if used in a loop including recursive loops
|
|
||||||
(CWE-120, CWE-20).
|
|
||||||
./opening_learner/engine.c:107:25: [1] (buffer) sscanf:
|
|
||||||
It's unclear if the %s limit in the format string is small enough
|
|
||||||
(CWE-120). Check that the limit is sufficiently small, or use a different
|
|
||||||
input function.
|
|
||||||
./opening_learner/engine.c:130:9: [1] (obsolete) usleep:
|
|
||||||
This C routine is considered obsolete (as opposed to the shell command by
|
|
||||||
the same name). The interaction of this function with SIGALRM and other
|
|
||||||
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
|
|
||||||
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
|
|
||||||
./opening_learner/engine.c:131:17: [1] (buffer) read:
|
|
||||||
Check buffer boundaries if used in a loop including recursive loops
|
|
||||||
(CWE-120, CWE-20).
|
|
||||||
./opening_learner/engine.c:136:52: [1] (buffer) sscanf:
|
|
||||||
It's unclear if the %s limit in the format string is small enough
|
|
||||||
(CWE-120). Check that the limit is sufficiently small, or use a different
|
|
||||||
input function.
|
|
||||||
./opening_learner/main.c:23:15: [1] (buffer) strncat:
|
|
||||||
Easily used incorrectly (e.g., incorrectly computing the correct maximum
|
|
||||||
size to add) [MS-banned] (CWE-120). Consider strcat_s, strlcat, snprintf,
|
|
||||||
or automatically resizing strings. Risk is low because the source is a
|
|
||||||
constant character.
|
|
||||||
./opening_learner/main.c:24:2: [1] (buffer) strncat:
|
|
||||||
Easily used incorrectly (e.g., incorrectly computing the correct maximum
|
|
||||||
size to add) [MS-banned] (CWE-120). Consider strcat_s, strlcat, snprintf,
|
|
||||||
or automatically resizing strings.
|
|
||||||
./opening_learner/main.c:36:73: [1] (buffer) strncpy:
|
|
||||||
Easily used incorrectly; doesn't always \0-terminate or check for invalid
|
|
||||||
pointers [MS-banned] (CWE-120).
|
|
||||||
./opening_learner/main.c:100:30: [1] (buffer) strncpy:
|
|
||||||
Easily used incorrectly; doesn't always \0-terminate or check for invalid
|
|
||||||
pointers [MS-banned] (CWE-120).
|
|
||||||
./opening_learner/main.c:140:5: [1] (buffer) strncpy:
|
|
||||||
Easily used incorrectly; doesn't always \0-terminate or check for invalid
|
|
||||||
pointers [MS-banned] (CWE-120).
|
|
||||||
./websocketServer/main.c:23:30: [1] (buffer) strlen:
|
|
||||||
Does not handle strings that are not \0-terminated; if given one it may
|
|
||||||
perform an over-read (it could cause a crash if unprotected) (CWE-126).
|
|
||||||
|
|
||||||
ANALYSIS SUMMARY:
|
|
||||||
|
|
||||||
Hits = 140
|
|
||||||
Lines analyzed = 5027 in approximately 0.26 seconds (19578 lines/second)
|
|
||||||
Physical Source Lines of Code (SLOC) = 4111
|
|
||||||
Hits@level = [0] 208 [1] 41 [2] 87 [3] 6 [4] 6 [5] 0
|
|
||||||
Hits@level+ = [0+] 348 [1+] 140 [2+] 99 [3+] 12 [4+] 6 [5+] 0
|
|
||||||
Hits/KSLOC@level+ = [0+] 84.6509 [1+] 34.055 [2+] 24.0817 [3+] 2.919 [4+] 1.4595 [5+] 0
|
|
||||||
Dot directories skipped = 1 (--followdotdir overrides)
|
|
||||||
Minimum risk level = 1
|
|
||||||
|
|
||||||
Not every hit is necessarily a security vulnerability.
|
|
||||||
You can inhibit a report by adding a comment in this form:
|
|
||||||
// flawfinder: ignore
|
|
||||||
Make *sure* it's a false positive!
|
|
||||||
You can use the option --neverignore to show these.
|
|
||||||
|
|
||||||
There may be other security vulnerabilities; review your code!
|
|
||||||
See 'Secure Programming HOWTO'
|
|
||||||
(https://dwheeler.com/secure-programs) for more information.
|
|
||||||
347
C/lint_all.sh
347
C/lint_all.sh
@ -1,347 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Lint all C code in C/ and its subfolders with aggressive rules
|
|
||||||
# - Installs required tools if missing (clang-tidy, clang-format, cppcheck, flawfinder)
|
|
||||||
# - Uses compile_commands.json if present for clang-tidy; otherwise uses sane defaults
|
|
||||||
# - Checks formatting with clang-format --dry-run --Werror
|
|
||||||
# - Runs cppcheck with exhaustive rules
|
|
||||||
# - Runs flawfinder for security issues
|
|
||||||
|
|
||||||
set -u
|
|
||||||
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m'
|
|
||||||
|
|
||||||
info() { echo -e "${BLUE}==>${NC} $*"; }
|
|
||||||
ok() { echo -e "${GREEN}✓${NC} $*"; }
|
|
||||||
warn() { echo -e "${YELLOW}⚠${NC} $*"; }
|
|
||||||
err() { echo -e "${RED}✗${NC} $*"; }
|
|
||||||
|
|
||||||
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
|
|
||||||
C_DIR="${ROOT_DIR}/C"
|
|
||||||
AUTOFIX=${LINT_AUTOFIX:-1}
|
|
||||||
|
|
||||||
if [[ ! -d "${C_DIR}" ]]; then
|
|
||||||
err "C directory not found at ${C_DIR}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
ISSUES=0
|
|
||||||
MISSING=()
|
|
||||||
C_FILES=()
|
|
||||||
C_SOURCES=()
|
|
||||||
|
|
||||||
need_cmd() {
|
|
||||||
command -v "$1" >/dev/null 2>&1 || MISSING+=("$1")
|
|
||||||
}
|
|
||||||
|
|
||||||
detect_pkg_manager() {
|
|
||||||
if command -v pacman >/dev/null 2>&1; then echo pacman; return; fi
|
|
||||||
if command -v apt-get >/dev/null 2>&1; then echo apt; return; fi
|
|
||||||
if command -v apt >/dev/null 2>&1; then echo apt; return; fi
|
|
||||||
if command -v dnf >/dev/null 2>&1; then echo dnf; return; fi
|
|
||||||
if command -v zypper >/dev/null 2>&1; then echo zypper; return; fi
|
|
||||||
if command -v apk >/dev/null 2>&1; then echo apk; return; fi
|
|
||||||
echo none
|
|
||||||
}
|
|
||||||
|
|
||||||
install_tools() {
|
|
||||||
info "Checking required tools..."
|
|
||||||
need_cmd clang-tidy
|
|
||||||
need_cmd clang-format
|
|
||||||
need_cmd cppcheck
|
|
||||||
need_cmd flawfinder
|
|
||||||
|
|
||||||
if [[ ${#MISSING[@]} -eq 0 ]]; then
|
|
||||||
ok "All tools present: clang-tidy, clang-format, cppcheck, flawfinder"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
warn "Missing tools: ${MISSING[*]} — attempting to install with sudo"
|
|
||||||
local pm
|
|
||||||
pm=$(detect_pkg_manager)
|
|
||||||
case "$pm" in
|
|
||||||
pacman)
|
|
||||||
sudo pacman -S --needed --noconfirm clang clang-tools-extra clang-format cppcheck flawfinder || true
|
|
||||||
;;
|
|
||||||
apt|apt-get)
|
|
||||||
sudo "$pm" update -y || true
|
|
||||||
# clang-tidy and clang-format may be versioned; prefer unversioned meta pkgs
|
|
||||||
sudo "$pm" install -y clang-tidy clang-format cppcheck flawfinder || true
|
|
||||||
;;
|
|
||||||
dnf)
|
|
||||||
sudo dnf install -y clang-tools-extra clang cppcheck flawfinder || true
|
|
||||||
;;
|
|
||||||
zypper)
|
|
||||||
sudo zypper --non-interactive install clang-tools clang-tools-extra cppcheck flawfinder || true
|
|
||||||
;;
|
|
||||||
apk)
|
|
||||||
sudo apk add clang-extra-tools clang cppcheck flawfinder || true
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
warn "Unsupported package manager. Please install: clang-tidy clang-format cppcheck flawfinder"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Re-check after attempted install
|
|
||||||
MISSING=()
|
|
||||||
need_cmd clang-tidy
|
|
||||||
need_cmd clang-format
|
|
||||||
need_cmd cppcheck
|
|
||||||
need_cmd flawfinder
|
|
||||||
if [[ ${#MISSING[@]} -ne 0 ]]; then
|
|
||||||
warn "Still missing: ${MISSING[*]} — continuing, but related steps may be skipped"
|
|
||||||
else
|
|
||||||
ok "Tools installed"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure_configs() {
|
|
||||||
# Provide default aggressive configs if missing
|
|
||||||
if [[ ! -f "${C_DIR}/.clang-tidy" ]]; then
|
|
||||||
warn ".clang-tidy not found in C/. Creating a default aggressive config."
|
|
||||||
cat >"${C_DIR}/.clang-tidy" <<'YAML'
|
|
||||||
Checks: >
|
|
||||||
clang-analyzer-*,bugprone-*,cert-*,concurrency-*,hicpp-*,misc-*,performance-*,
|
|
||||||
portability-*,readability-*,clang-diagnostic-*,cppcoreguidelines-*
|
|
||||||
WarningsAsErrors: '*'
|
|
||||||
HeaderFilterRegex: '.*'
|
|
||||||
AnalyzeTemporaryDtors: true
|
|
||||||
FormatStyle: none
|
|
||||||
YAML
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ! -f "${C_DIR}/.clang-format" ]]; then
|
|
||||||
warn ".clang-format not found in C/. Creating a default style."
|
|
||||||
cat >"${C_DIR}/.clang-format" <<'YAML'
|
|
||||||
BasedOnStyle: LLVM
|
|
||||||
IndentWidth: 4
|
|
||||||
TabWidth: 4
|
|
||||||
UseTab: Never
|
|
||||||
ColumnLimit: 100
|
|
||||||
SortIncludes: true
|
|
||||||
AlignConsecutiveAssignments: true
|
|
||||||
AlignConsecutiveDeclarations: true
|
|
||||||
AllowShortIfStatementsOnASingleLine: false
|
|
||||||
BreakBeforeBraces: Allman
|
|
||||||
Standard: C23
|
|
||||||
YAML
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
collect_files() {
|
|
||||||
# shellcheck disable=SC2207
|
|
||||||
C_FILES=($(find "${C_DIR}" -type f \( -name '*.c' -o -name '*.h' -o -name '*.inc' \) \
|
|
||||||
-not -path '*/.*' -not -path '*/build/*' -not -path '*/dist/*' -not -path '*/out/*' \
|
|
||||||
-not -path '*/bin/*' -not -path '*/obj/*'))
|
|
||||||
if [[ ${#C_FILES[@]} -eq 0 ]]; then
|
|
||||||
warn "No C files found under ${C_DIR}"
|
|
||||||
else
|
|
||||||
ok "Found ${#C_FILES[@]} C-related files to check"
|
|
||||||
fi
|
|
||||||
mapfile -t C_SOURCES < <(find "${C_DIR}" -type f -name '*.c' \
|
|
||||||
-not -path '*/.*' -not -path '*/build/*' -not -path '*/dist/*' -not -path '*/out/*' \
|
|
||||||
-not -path '*/bin/*' -not -path '*/obj/*')
|
|
||||||
}
|
|
||||||
|
|
||||||
apply_clang_format_fix() {
|
|
||||||
if ! command -v clang-format >/dev/null 2>&1; then
|
|
||||||
warn "clang-format unavailable; skipping auto-format"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
if [[ ${#C_FILES[@]} -eq 0 ]]; then
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
info "Applying clang-format -i to source files"
|
|
||||||
local formatted=0
|
|
||||||
for f in "${C_FILES[@]}"; do
|
|
||||||
if clang-format -i "$f" 2>/dev/null; then
|
|
||||||
formatted=$((formatted+1))
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
ok "clang-format applied to ${formatted} file(s)"
|
|
||||||
}
|
|
||||||
|
|
||||||
apply_clang_tidy_fix() {
|
|
||||||
if ! command -v clang-tidy >/dev/null 2>&1; then
|
|
||||||
warn "clang-tidy unavailable; skipping auto-fix"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
if [[ ${#C_SOURCES[@]} -eq 0 ]]; then
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
local db="${C_DIR}/compile_commands.json"
|
|
||||||
local used_db="no"
|
|
||||||
if [[ -f "$db" ]] && head -n 1 "$db" | grep -q '\['; then
|
|
||||||
used_db="yes"
|
|
||||||
fi
|
|
||||||
info "Applying clang-tidy --fix to C sources"
|
|
||||||
local failures=0
|
|
||||||
for f in "${C_SOURCES[@]}"; do
|
|
||||||
local rel
|
|
||||||
rel=$(realpath --relative-to="${ROOT_DIR}" "$f" 2>/dev/null || echo "$f")
|
|
||||||
printf ' • %s\n' "$rel"
|
|
||||||
if [[ "$used_db" == "yes" ]]; then
|
|
||||||
if ! clang-tidy "$f" -p "${C_DIR}" --fix --format-style=file --quiet >/dev/null 2>&1; then
|
|
||||||
failures=$((failures+1))
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
if ! clang-tidy "$f" --fix --format-style=file --quiet -- -std=c2x -I"$(dirname "$f")" -I"${C_DIR}" >/dev/null 2>&1; then
|
|
||||||
failures=$((failures+1))
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
if [[ $failures -gt 0 ]]; then
|
|
||||||
warn "clang-tidy auto-fix encountered $failures issue(s); manual review may be required"
|
|
||||||
else
|
|
||||||
ok "clang-tidy auto-fix pass completed"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
apply_autofix() {
|
|
||||||
if [[ "$AUTOFIX" == "0" ]]; then
|
|
||||||
info "Automatic fixes disabled (LINT_AUTOFIX=0)"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
info "Automatic fixes enabled (LINT_AUTOFIX=${AUTOFIX})"
|
|
||||||
apply_clang_format_fix
|
|
||||||
apply_clang_tidy_fix
|
|
||||||
# Refresh file lists in case new files were introduced by fixes
|
|
||||||
collect_files
|
|
||||||
}
|
|
||||||
|
|
||||||
run_clang_format() {
|
|
||||||
if ! command -v clang-format >/dev/null 2>&1; then
|
|
||||||
warn "clang-format unavailable; skipping format check"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
info "Checking formatting with clang-format (--dry-run --Werror)"
|
|
||||||
local bad=0
|
|
||||||
for f in "${C_FILES[@]}"; do
|
|
||||||
if ! clang-format --dry-run --Werror "$f" >/dev/null 2>&1; then
|
|
||||||
echo "format issue: $f"
|
|
||||||
bad=$((bad+1))
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
if [[ $bad -gt 0 ]]; then
|
|
||||||
warn "clang-format found $bad files needing formatting"
|
|
||||||
ISSUES=$((ISSUES+bad))
|
|
||||||
else
|
|
||||||
ok "Formatting OK"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
run_cppcheck() {
|
|
||||||
if ! command -v cppcheck >/dev/null 2>&1; then
|
|
||||||
warn "cppcheck unavailable; skipping"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
info "Running cppcheck (aggressive, recursive)"
|
|
||||||
# Use a temp report file to avoid noisy exit codes stopping script
|
|
||||||
local report
|
|
||||||
report=$(mktemp)
|
|
||||||
local opts=(--enable=all --inconclusive --std=c23 --check-level=exhaustive --force \
|
|
||||||
--quiet --error-exitcode=2 --inline-suppr --suppress=missingIncludeSystem \
|
|
||||||
--library=posix)
|
|
||||||
# Exclude common non-source dirs
|
|
||||||
opts+=(--exclude=build --exclude=dist --exclude=out --exclude=.git --exclude=bin --exclude=obj)
|
|
||||||
if ! cppcheck "${opts[@]}" "${C_DIR}" 2>"$report"; then
|
|
||||||
warn "cppcheck reported issues (see summary below)"
|
|
||||||
ISSUES=$((ISSUES+1))
|
|
||||||
else
|
|
||||||
ok "cppcheck passed"
|
|
||||||
fi
|
|
||||||
if [[ -s "$report" ]]; then
|
|
||||||
echo
|
|
||||||
echo "cppcheck output:" && sed -e 's/^/ /' "$report"
|
|
||||||
fi
|
|
||||||
rm -f "$report"
|
|
||||||
}
|
|
||||||
|
|
||||||
run_clang_tidy() {
|
|
||||||
if ! command -v clang-tidy >/dev/null 2>&1; then
|
|
||||||
warn "clang-tidy unavailable; skipping"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
info "Running clang-tidy on .c files"
|
|
||||||
local db="${C_DIR}/compile_commands.json"
|
|
||||||
local used_db="no"
|
|
||||||
if [[ ${#C_SOURCES[@]} -eq 0 ]]; then
|
|
||||||
warn "No .c files for clang-tidy"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
if [[ -f "$db" ]]; then
|
|
||||||
# Basic validation: ensure JSON array starts with [ and includes "directory"
|
|
||||||
if head -n 1 "$db" | grep -q '\['; then
|
|
||||||
used_db="yes"
|
|
||||||
else
|
|
||||||
warn "compile_commands.json seems malformed; ignoring"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
local failures=0
|
|
||||||
for f in "${C_SOURCES[@]}"; do
|
|
||||||
if [[ "$used_db" == "yes" ]]; then
|
|
||||||
clang-tidy "$f" -p "${C_DIR}" --quiet || failures=$((failures+1))
|
|
||||||
else
|
|
||||||
# Fallback args: try C23 and include local dir
|
|
||||||
clang-tidy "$f" --quiet -- -std=c2x -I"$(dirname "$f")" -I"${C_DIR}" || failures=$((failures+1))
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
if [[ $failures -gt 0 ]]; then
|
|
||||||
warn "clang-tidy found issues in $failures file(s)"
|
|
||||||
ISSUES=$((ISSUES+failures))
|
|
||||||
else
|
|
||||||
ok "clang-tidy passed"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
run_flawfinder() {
|
|
||||||
if ! command -v flawfinder >/dev/null 2>&1; then
|
|
||||||
warn "flawfinder unavailable; skipping"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
info "Running flawfinder (security-focused scan)"
|
|
||||||
local report
|
|
||||||
report=$(mktemp)
|
|
||||||
if ! flawfinder --quiet --columns --minlevel=1 --falsepositive "${C_DIR}" >"$report" 2>/dev/null; then
|
|
||||||
warn "flawfinder reported issues"
|
|
||||||
ISSUES=$((ISSUES+1))
|
|
||||||
else
|
|
||||||
ok "flawfinder completed"
|
|
||||||
fi
|
|
||||||
if [[ -s "$report" ]]; then
|
|
||||||
echo
|
|
||||||
echo "flawfinder notable findings:" && head -n 200 "$report" | sed -e 's/^/ /'
|
|
||||||
fi
|
|
||||||
rm -f "$report"
|
|
||||||
}
|
|
||||||
|
|
||||||
summary_exit() {
|
|
||||||
echo
|
|
||||||
if [[ $ISSUES -gt 0 ]]; then
|
|
||||||
err "Lint completed with $ISSUES issue(s) detected"
|
|
||||||
echo "Tip: run 'clang-format -i' to fix formatting; many clang-tidy checks support '--fix'"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
ok "All checks passed with no issues"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
main() {
|
|
||||||
echo -e "${BLUE}C folder – aggressive lint suite${NC}"
|
|
||||||
echo
|
|
||||||
install_tools
|
|
||||||
ensure_configs
|
|
||||||
collect_files
|
|
||||||
apply_autofix
|
|
||||||
run_clang_format
|
|
||||||
run_cppcheck
|
|
||||||
run_clang_tidy
|
|
||||||
run_flawfinder
|
|
||||||
summary_exit
|
|
||||||
}
|
|
||||||
|
|
||||||
main "$@"
|
|
||||||
23
README.md
23
README.md
@ -1,6 +1,6 @@
|
|||||||
# testsAndMisc
|
# testsAndMisc
|
||||||
|
|
||||||
A collection of personal projects, scripts, and experiments — from a GPS-based phone focus tool to C/C++ demos, with CI, linting, and pre-commit hooks across the board.
|
A collection of personal projects, scripts, and experiments — from a GPS-based phone focus tool to Linux/Arch automation, with CI, linting, and pre-commit hooks across the board.
|
||||||
|
|
||||||
## Highlights
|
## Highlights
|
||||||
|
|
||||||
@ -14,21 +14,26 @@ Automated Arch Linux setup: fresh-install scripts, i3 window manager config, LaT
|
|||||||
|
|
||||||
### [Scripts](scripts/)
|
### [Scripts](scripts/)
|
||||||
|
|
||||||
Utility scripts for development workflows — C/C++ build file validation, secret detection, and custom makepkg helpers.
|
Utility scripts for development workflows — build file validation, secret detection, and custom makepkg helpers.
|
||||||
|
|
||||||
## Other Projects
|
## Repository Layout
|
||||||
|
|
||||||
| Directory | Description |
|
| Path | Description |
|
||||||
| ------------- | ------------------------ |
|
| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `Bash/` | FFmpeg build scripts |
|
| `python_pkg/` | Python packages (each maintained subpackage lives here) |
|
||||||
| `C/` | Small native helpers |
|
| `linux_configuration/` | Arch Linux setup, i3 config, system maintenance scripts |
|
||||||
| `python_pkg/` | Python package structure |
|
| `phone_focus_mode/` | GPS-based Android focus enforcer |
|
||||||
|
| `scripts/` | Workspace-level helper scripts and pre-commit hooks |
|
||||||
|
| `docs/` | Reference docs and historical reports |
|
||||||
|
| `third_party/` | Vendored upstream skills/agents |
|
||||||
|
| `meta/` | Repo-wide tooling: `pyproject.toml`, `requirements.txt`, `.pre-commit-config.yaml`, `run.sh`, `lint_python.sh`, `.fvmrc`. Symlinked into the repo root so tools that auto-discover from root keep working. |
|
||||||
|
|
||||||
Archived / unmaintained projects live in the sibling repository
|
Archived / unmaintained projects live in the sibling repository
|
||||||
[`testsAndMisc-archive`](https://github.com/kuhyx/testsAndMisc-archive).
|
[`testsAndMisc-archive`](https://github.com/kuhyx/testsAndMisc-archive).
|
||||||
|
|
||||||
## Tooling
|
## Tooling
|
||||||
|
|
||||||
- **Python linting**: [Ruff](https://docs.astral.sh/ruff/) with all rules enabled (see `pyproject.toml`)
|
- **Python linting**: [Ruff](https://docs.astral.sh/ruff/) with all rules enabled (see `meta/pyproject.toml`)
|
||||||
|
- **Dependencies**: `pip install -r meta/requirements.txt` (combined runtime + dev)
|
||||||
- **CI**: GitHub Actions — lint, build, and test on push
|
- **CI**: GitHub Actions — lint, build, and test on push
|
||||||
- **Testing**: pytest (Python), custom shell-based test harness for scripts
|
- **Testing**: pytest (Python), custom shell-based test harness for scripts
|
||||||
|
|||||||
50
docs/superpowers/evidence/cleanup-meta-folder-2026-05.json
Normal file
50
docs/superpowers/evidence/cleanup-meta-folder-2026-05.json
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
{
|
||||||
|
"intent": "Consolidate repo-wide tooling configs into a meta/ folder, drop unused C/ and a few stale python_pkg subpackages, combine requirements files, and remove setup.sh + .binary-allowlist. Root-level symlinks keep tool auto-discovery (pyproject.toml, .pre-commit-config.yaml, requirements.txt, run.sh, lint_python.sh, .fvmrc) working unchanged.",
|
||||||
|
"scope": [
|
||||||
|
"Delete: setup.sh, .binary-allowlist, C/, python_pkg/{split,pdfCentered,geo_data}, scripts/check_c_cpp_build_files.sh",
|
||||||
|
"Move into meta/: run.sh, lint_python.sh, pyproject.toml, .pre-commit-config.yaml, .fvmrc",
|
||||||
|
"Combine requirements.txt + requirements-dev.txt into meta/requirements.txt",
|
||||||
|
"Add root symlinks for each moved file so existing tooling keeps resolving from repo root",
|
||||||
|
"Update README.md and meta/.pre-commit-config.yaml to drop archived path references",
|
||||||
|
"Add .secret-patterns to .gitignore and untrack it (its content is sensitive home-coordinate regex)"
|
||||||
|
],
|
||||||
|
"changes": [
|
||||||
|
"git rm of removed paths and stale combined requirements files",
|
||||||
|
"git mv of five root configs into meta/ preserving git history",
|
||||||
|
"Created meta/requirements.txt as the single source of truth (alphabetically sorted, pre-commit-friendly)",
|
||||||
|
"Created six root symlinks pointing into meta/",
|
||||||
|
"Removed clang-format/cppcheck/flawfinder/check-c-cpp-build-files hooks (no C/C++ code remains)",
|
||||||
|
"Edited check-json, check_polling, codespell, gitleaks-style excludes to drop C/ and CPP/ references",
|
||||||
|
"Rewrote README.md repository layout section to mention meta/ and drop C/ + Bash/ rows"
|
||||||
|
],
|
||||||
|
"verification": [
|
||||||
|
{
|
||||||
|
"command": "ls -la run.sh pyproject.toml .pre-commit-config.yaml requirements.txt lint_python.sh .fvmrc",
|
||||||
|
"result": "pass",
|
||||||
|
"evidence": "All resolve as symlinks into meta/ with correct targets"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"command": "python -c 'import tomllib; tomllib.load(open(\"pyproject.toml\",\"rb\"))'",
|
||||||
|
"result": "pass",
|
||||||
|
"evidence": "tomllib parses pyproject.toml through the root symlink"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"command": "pre-commit validate-config",
|
||||||
|
"result": "pass",
|
||||||
|
"evidence": "Config validates after path edits"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"command": "pre-commit run --hook-stage pre-commit",
|
||||||
|
"result": "pending",
|
||||||
|
"evidence": "Final run executed after adding this artifact"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"risks": [
|
||||||
|
"Anything that hard-coded the legacy path requirements-dev.txt or C/ breaks. No internal callers found via git grep.",
|
||||||
|
"If a tool resolves symlinks (rare) it might log the meta/ path instead of root; harmless."
|
||||||
|
],
|
||||||
|
"rollback": [
|
||||||
|
"git revert the consolidation commit; symlinks and moved files restore atomically because git tracks them.",
|
||||||
|
"Run pre-commit run --all-files after revert to confirm legacy layout still validates."
|
||||||
|
]
|
||||||
|
}
|
||||||
346
lint_python.sh
346
lint_python.sh
@ -1,346 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# ==============================================================================
|
|
||||||
# Python Linting Script - Run ALL linters with aggressive settings
|
|
||||||
# ==============================================================================
|
|
||||||
# Usage:
|
|
||||||
# ./lint_python.sh # Lint all Python files
|
|
||||||
# ./lint_python.sh --fix # Lint and auto-fix where possible
|
|
||||||
# ./lint_python.sh <file.py> # Lint specific file
|
|
||||||
# ./lint_python.sh --quick # Quick lint (ruff + mypy only)
|
|
||||||
# ./lint_python.sh --report # Generate detailed reports
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
MAGENTA='\033[0;35m'
|
|
||||||
CYAN='\033[0;36m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
BOLD='\033[1m'
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
PROJECT_ROOT="${SCRIPT_DIR}"
|
|
||||||
PYTHON_PATHS=(
|
|
||||||
"PYTHON"
|
|
||||||
"articles"
|
|
||||||
"poker-modifier-app"
|
|
||||||
"tests"
|
|
||||||
)
|
|
||||||
EXCLUDE_PATHS=(
|
|
||||||
".venv"
|
|
||||||
"__pycache__"
|
|
||||||
".git"
|
|
||||||
"Bash/ffmpeg-build"
|
|
||||||
".pytest_cache"
|
|
||||||
".ruff_cache"
|
|
||||||
".mypy_cache"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Build exclude pattern for find
|
|
||||||
EXCLUDE_PATTERN=""
|
|
||||||
for path in "${EXCLUDE_PATHS[@]}"; do
|
|
||||||
EXCLUDE_PATTERN="${EXCLUDE_PATTERN} -path '*/${path}/*' -prune -o"
|
|
||||||
done
|
|
||||||
|
|
||||||
# Parse arguments
|
|
||||||
FIX_MODE=false
|
|
||||||
QUICK_MODE=false
|
|
||||||
REPORT_MODE=false
|
|
||||||
TARGET_FILES=""
|
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
case $1 in
|
|
||||||
--fix|-f)
|
|
||||||
FIX_MODE=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--quick|-q)
|
|
||||||
QUICK_MODE=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--report|-r)
|
|
||||||
REPORT_MODE=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--help|-h)
|
|
||||||
echo "Usage: $0 [OPTIONS] [FILES...]"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " --fix, -f Auto-fix issues where possible"
|
|
||||||
echo " --quick, -q Quick mode (ruff + mypy only)"
|
|
||||||
echo " --report, -r Generate detailed reports to ./lint-reports/"
|
|
||||||
echo " --help, -h Show this help message"
|
|
||||||
echo ""
|
|
||||||
echo "Examples:"
|
|
||||||
echo " $0 # Lint all Python files"
|
|
||||||
echo " $0 --fix # Lint and auto-fix"
|
|
||||||
echo " $0 PYTHON/ # Lint specific directory"
|
|
||||||
echo " $0 --quick --fix # Quick lint with auto-fix"
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
TARGET_FILES="${TARGET_FILES} $1"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# If no target specified, use default paths
|
|
||||||
if [[ -z "${TARGET_FILES}" ]]; then
|
|
||||||
TARGET_FILES="${PYTHON_PATHS[*]}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Create reports directory if needed
|
|
||||||
if [[ "${REPORT_MODE}" == true ]]; then
|
|
||||||
mkdir -p "${PROJECT_ROOT}/lint-reports"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Track overall status
|
|
||||||
OVERALL_STATUS=0
|
|
||||||
FAILED_TOOLS=()
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# Helper functions
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
print_header() {
|
|
||||||
echo ""
|
|
||||||
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
|
|
||||||
echo -e "${BOLD}${BLUE} $1${NC}"
|
|
||||||
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_subheader() {
|
|
||||||
echo ""
|
|
||||||
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
|
|
||||||
echo -e "${CYAN} $1${NC}"
|
|
||||||
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_success() {
|
|
||||||
echo -e "${GREEN}✓${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_warning() {
|
|
||||||
echo -e "${YELLOW}⚠${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_error() {
|
|
||||||
echo -e "${RED}✗${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_info() {
|
|
||||||
echo -e "${BLUE}ℹ${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
run_tool() {
|
|
||||||
local tool_name="$1"
|
|
||||||
local tool_cmd="$2"
|
|
||||||
local report_file="${PROJECT_ROOT}/lint-reports/${tool_name}.txt"
|
|
||||||
|
|
||||||
print_subheader "Running ${tool_name}..."
|
|
||||||
|
|
||||||
if [[ "${REPORT_MODE}" == true ]]; then
|
|
||||||
if eval "${tool_cmd}" 2>&1 | tee "${report_file}"; then
|
|
||||||
print_success "${tool_name} passed"
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
print_error "${tool_name} found issues (see ${report_file})"
|
|
||||||
FAILED_TOOLS+=("${tool_name}")
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
if eval "${tool_cmd}"; then
|
|
||||||
print_success "${tool_name} passed"
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
print_error "${tool_name} found issues"
|
|
||||||
FAILED_TOOLS+=("${tool_name}")
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
check_tool() {
|
|
||||||
if command -v "$1" &> /dev/null; then
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
print_warning "$1 not found, skipping..."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# Main linting process
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
print_header "Python Linting Suite - Aggressive Mode"
|
|
||||||
echo ""
|
|
||||||
print_info "Target: ${TARGET_FILES}"
|
|
||||||
print_info "Fix mode: ${FIX_MODE}"
|
|
||||||
print_info "Quick mode: ${QUICK_MODE}"
|
|
||||||
print_info "Report mode: ${REPORT_MODE}"
|
|
||||||
|
|
||||||
cd "${PROJECT_ROOT}"
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# RUFF - Primary linter and formatter
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool ruff; then
|
|
||||||
if [[ "${FIX_MODE}" == true ]]; then
|
|
||||||
run_tool "ruff-lint" "ruff check --fix --show-fixes ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
run_tool "ruff-format" "ruff format ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
else
|
|
||||||
run_tool "ruff-lint" "ruff check ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
run_tool "ruff-format-check" "ruff format --check ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# MYPY - Static type checking
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool mypy; then
|
|
||||||
run_tool "mypy" "mypy --strict --ignore-missing-imports ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Quick mode exits here
|
|
||||||
if [[ "${QUICK_MODE}" == true ]]; then
|
|
||||||
print_header "Quick Lint Complete"
|
|
||||||
if [[ ${#FAILED_TOOLS[@]} -gt 0 ]]; then
|
|
||||||
print_error "Failed tools: ${FAILED_TOOLS[*]}"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
print_success "All quick checks passed!"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# PYLINT - Comprehensive linting
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool pylint; then
|
|
||||||
run_tool "pylint" "pylint --rcfile=pyproject.toml --jobs=0 --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# BANDIT - Security linting
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool bandit; then
|
|
||||||
run_tool "bandit" "bandit -c pyproject.toml -r ${TARGET_FILES} --severity-level low --confidence-level low" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# VULTURE - Dead code detection
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool vulture; then
|
|
||||||
run_tool "vulture" "vulture --min-confidence 80 ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# FLAKE8 - Traditional linter
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool flake8; then
|
|
||||||
run_tool "flake8" "flake8 --max-line-length=88 --extend-ignore=E203,W503 --max-complexity=10 ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# PYCODESTYLE - PEP 8 style checker
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool pycodestyle; then
|
|
||||||
run_tool "pycodestyle" "pycodestyle --max-line-length=88 --ignore=E203,W503 ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# PYDOCSTYLE - Docstring style checker
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool pydocstyle; then
|
|
||||||
run_tool "pydocstyle" "pydocstyle --convention=google ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# RADON - Complexity metrics
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool radon; then
|
|
||||||
print_subheader "Running radon (complexity analysis)..."
|
|
||||||
echo ""
|
|
||||||
echo -e "${MAGENTA}Cyclomatic Complexity:${NC}"
|
|
||||||
radon cc -a -s ${TARGET_FILES} || true
|
|
||||||
echo ""
|
|
||||||
echo -e "${MAGENTA}Maintainability Index:${NC}"
|
|
||||||
radon mi -s ${TARGET_FILES} || true
|
|
||||||
|
|
||||||
if [[ "${REPORT_MODE}" == true ]]; then
|
|
||||||
radon cc -a -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-cc.txt" 2>&1 || true
|
|
||||||
radon mi -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-mi.txt" 2>&1 || true
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# INTERROGATE - Docstring coverage
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool interrogate; then
|
|
||||||
run_tool "interrogate" "interrogate -v --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# PYRIGHT - Microsoft's type checker (optional, very strict)
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool pyright; then
|
|
||||||
run_tool "pyright" "pyright ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# AUTOFLAKE - Unused imports/variables (fix mode only)
|
|
||||||
# ==============================================================================
|
|
||||||
if [[ "${FIX_MODE}" == true ]] && check_tool autoflake; then
|
|
||||||
print_subheader "Running autoflake (removing unused imports)..."
|
|
||||||
find ${TARGET_FILES} -name "*.py" -type f -exec autoflake --in-place --remove-all-unused-imports --remove-unused-variables {} \;
|
|
||||||
print_success "autoflake completed"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# PYUPGRADE - Upgrade Python syntax (fix mode only)
|
|
||||||
# ==============================================================================
|
|
||||||
if [[ "${FIX_MODE}" == true ]] && check_tool pyupgrade; then
|
|
||||||
print_subheader "Running pyupgrade (upgrading syntax to Python 3.10+)..."
|
|
||||||
find ${TARGET_FILES} -name "*.py" -type f -exec pyupgrade --py310-plus {} \;
|
|
||||||
print_success "pyupgrade completed"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# CODESPELL - Spell checking
|
|
||||||
# ==============================================================================
|
|
||||||
if check_tool codespell; then
|
|
||||||
if [[ "${FIX_MODE}" == true ]]; then
|
|
||||||
run_tool "codespell" "codespell -w --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
else
|
|
||||||
run_tool "codespell" "codespell --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# Summary
|
|
||||||
# ==============================================================================
|
|
||||||
print_header "Linting Summary"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
if [[ ${OVERALL_STATUS} -ne 0 ]]; then
|
|
||||||
print_error "The following tools reported issues:"
|
|
||||||
for tool in "${FAILED_TOOLS[@]}"; do
|
|
||||||
echo " - ${tool}"
|
|
||||||
done
|
|
||||||
echo ""
|
|
||||||
if [[ "${REPORT_MODE}" == true ]]; then
|
|
||||||
print_info "Detailed reports saved to: ${PROJECT_ROOT}/lint-reports/"
|
|
||||||
fi
|
|
||||||
print_info "Run with --fix to auto-fix issues where possible"
|
|
||||||
exit ${OVERALL_STATUS}
|
|
||||||
else
|
|
||||||
print_success "All linting checks passed!"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
1
lint_python.sh
Symbolic link
1
lint_python.sh
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
meta/lint_python.sh
|
||||||
3
meta/.fvmrc
Normal file
3
meta/.fvmrc
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"flutter": "stable"
|
||||||
|
}
|
||||||
391
meta/.pre-commit-config.yaml
Normal file
391
meta/.pre-commit-config.yaml
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
# ==============================================================================
|
||||||
|
# Pre-commit Configuration - Multi-language Linting & Formatting
|
||||||
|
# ==============================================================================
|
||||||
|
# Install: pre-commit install && pre-commit install --hook-type pre-push
|
||||||
|
# Fast lint: pre-commit run --all-files (linters only, ~10 s)
|
||||||
|
# Full suite: pre-commit run --all-files --hook-stage pre-push (+ tests)
|
||||||
|
# Update hooks: pre-commit autoupdate
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# Global settings
|
||||||
|
default_language_version:
|
||||||
|
python: python3
|
||||||
|
|
||||||
|
# Fail fast on first error (set to false to see all errors)
|
||||||
|
fail_fast: false
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
ci:
|
||||||
|
autofix_commit_msg: "style: auto-fix by pre-commit hooks"
|
||||||
|
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
||||||
|
|
||||||
|
repos:
|
||||||
|
# ===========================================================================
|
||||||
|
# GENERAL HOOKS - File formatting and validation
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v4.6.0
|
||||||
|
hooks:
|
||||||
|
- id: trailing-whitespace
|
||||||
|
args: [--markdown-linebreak-ext=md]
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: check-yaml
|
||||||
|
args: [--unsafe]
|
||||||
|
- id: check-json
|
||||||
|
# Exclude JSONC files (VS Code configs, TypeScript configs)
|
||||||
|
exclude: ^(\.vscode/|.*/\.vscode/|.*tsconfig.*\.json)
|
||||||
|
- id: check-toml
|
||||||
|
- id: check-xml
|
||||||
|
- id: check-added-large-files
|
||||||
|
args: [--maxkb=2000]
|
||||||
|
- id: check-merge-conflict
|
||||||
|
- id: check-case-conflict
|
||||||
|
- id: check-symlinks
|
||||||
|
- id: check-executables-have-shebangs
|
||||||
|
- id: check-shebang-scripts-are-executable
|
||||||
|
- id: detect-private-key
|
||||||
|
- id: debug-statements
|
||||||
|
- id: name-tests-test
|
||||||
|
args: [--pytest-test-first]
|
||||||
|
- id: check-ast
|
||||||
|
- id: check-builtin-literals
|
||||||
|
- id: check-docstring-first
|
||||||
|
- id: fix-byte-order-marker
|
||||||
|
- id: mixed-line-ending
|
||||||
|
args: [--fix=lf]
|
||||||
|
- id: requirements-txt-fixer
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# BINARY BLOCKER - Prevent binary/image files from being committed
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: no-binaries
|
||||||
|
name: Block binary/image files
|
||||||
|
entry: scripts/check_no_binaries.sh
|
||||||
|
language: script
|
||||||
|
always_run: false
|
||||||
|
- id: ai-evidence-contract
|
||||||
|
name: Require AI evidence artifacts for code changes
|
||||||
|
entry: scripts/check_ai_evidence.sh
|
||||||
|
language: script
|
||||||
|
pass_filenames: false
|
||||||
|
always_run: true
|
||||||
|
- id: ai-multifile-contract
|
||||||
|
name: Require workflow contract for multi-file code changes
|
||||||
|
entry: scripts/check_agent_contract.sh
|
||||||
|
language: script
|
||||||
|
pass_filenames: false
|
||||||
|
always_run: true
|
||||||
|
- id: append-only-sessions
|
||||||
|
name: Enforce append-only session logs
|
||||||
|
entry: scripts/check_append_only_sessions.sh
|
||||||
|
language: script
|
||||||
|
pass_filenames: false
|
||||||
|
always_run: true
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# POLLING SCRIPT LINTER - Detect fork-storm anti-patterns in shell scripts
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: no-polling-antipatterns
|
||||||
|
name: Block polling script anti-patterns
|
||||||
|
entry: scripts/check_polling_antipatterns.sh
|
||||||
|
language: script
|
||||||
|
types: [shell]
|
||||||
|
exclude: ^(\.git/|phone_focus_mode/lib/tests/|tests/)
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# NOQA BLOCKER - Zero tolerance for noqa/type:ignore suppression comments
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: no-noqa
|
||||||
|
name: Block noqa comments
|
||||||
|
entry: '(?i)#\s*(noqa|type:\s*ignore)'
|
||||||
|
language: pygrep
|
||||||
|
types: [python]
|
||||||
|
- id: no-ruff-noqa
|
||||||
|
name: Block ruff noqa file-level comments
|
||||||
|
entry: '(?i)#\s*ruff:\s*noqa'
|
||||||
|
language: pygrep
|
||||||
|
types: [python]
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# RUFF - Fast Python linter and formatter (replaces black, isort, flake8, etc.)
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.15.2
|
||||||
|
hooks:
|
||||||
|
# Linter - run first to catch issues
|
||||||
|
- id: ruff
|
||||||
|
args:
|
||||||
|
- --fix
|
||||||
|
- --unsafe-fixes
|
||||||
|
- --exit-non-zero-on-fix
|
||||||
|
- --show-fixes
|
||||||
|
types_or: [python, pyi]
|
||||||
|
# Formatter - run after linting
|
||||||
|
- id: ruff-format
|
||||||
|
types_or: [python, pyi]
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# MYPY - Static type checking (runs on push only for speed)
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
|
rev: v1.13.0
|
||||||
|
hooks:
|
||||||
|
- id: mypy
|
||||||
|
stages: [pre-push]
|
||||||
|
args:
|
||||||
|
- --ignore-missing-imports
|
||||||
|
- --no-error-summary
|
||||||
|
- --disable-error-code=no-untyped-def
|
||||||
|
- --disable-error-code=no-untyped-call
|
||||||
|
- --disable-error-code=var-annotated
|
||||||
|
- --disable-error-code=no-any-unimported
|
||||||
|
- --disable-error-code=type-arg
|
||||||
|
- --disable-error-code=no-any-return
|
||||||
|
- --disable-error-code=misc
|
||||||
|
- --disable-error-code=unused-ignore
|
||||||
|
- --disable-error-code=unreachable
|
||||||
|
- --disable-error-code=assignment
|
||||||
|
- --disable-error-code=no-redef
|
||||||
|
- --disable-error-code=attr-defined
|
||||||
|
- --disable-error-code=arg-type
|
||||||
|
- --disable-error-code=union-attr
|
||||||
|
- --disable-error-code=call-overload
|
||||||
|
- --disable-error-code=return-value
|
||||||
|
- --disable-error-code=redundant-cast
|
||||||
|
- --disable-error-code=empty-body
|
||||||
|
- --disable-error-code=list-item
|
||||||
|
exclude: >-
|
||||||
|
(?x)^(
|
||||||
|
Bash/.*|
|
||||||
|
\.venv/.*|
|
||||||
|
linux_configuration/scripts/misc/testsAndMisc-bash/tools/.*
|
||||||
|
)$
|
||||||
|
additional_dependencies:
|
||||||
|
- types-requests
|
||||||
|
- types-PyYAML
|
||||||
|
- types-python-dateutil
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# PYLINT - Comprehensive Python linter (runs on push only for speed)
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: https://github.com/pylint-dev/pylint
|
||||||
|
rev: v3.3.2
|
||||||
|
hooks:
|
||||||
|
- id: pylint
|
||||||
|
stages: [pre-push]
|
||||||
|
args:
|
||||||
|
- --rcfile=pyproject.toml
|
||||||
|
- --fail-under=8.0
|
||||||
|
- --jobs=0
|
||||||
|
additional_dependencies:
|
||||||
|
- pytest
|
||||||
|
- python-chess
|
||||||
|
- requests
|
||||||
|
- pygame
|
||||||
|
exclude: ^(Bash/|\.venv/)
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# BANDIT - Security linter (runs on push only for speed)
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: https://github.com/PyCQA/bandit
|
||||||
|
rev: 1.7.10
|
||||||
|
hooks:
|
||||||
|
- id: bandit
|
||||||
|
stages: [pre-push]
|
||||||
|
args:
|
||||||
|
- -c
|
||||||
|
- pyproject.toml
|
||||||
|
- --severity-level=high
|
||||||
|
- --confidence-level=medium
|
||||||
|
- --skip=B113
|
||||||
|
additional_dependencies: ["bandit[toml]"]
|
||||||
|
exclude: ^(Bash/|\.venv/|tests/|.*test.*\.py$)
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# PYTEST + COVERAGE - Run tests and enforce 100% code coverage
|
||||||
|
# Only tests for subpackages with changed files are run (see script).
|
||||||
|
# Runs on push only (slow); use --hook-stage pre-push to run manually.
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: pytest-coverage
|
||||||
|
name: pytest with coverage enforcement
|
||||||
|
entry: python scripts/pytest_changed_packages.py
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
pass_filenames: true
|
||||||
|
stages: [pre-push]
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# VULTURE - Dead code detection (disabled - doesn't work well with pre-commit)
|
||||||
|
# ===========================================================================
|
||||||
|
# - repo: https://github.com/jendrikseipp/vulture
|
||||||
|
# rev: v2.13
|
||||||
|
# hooks:
|
||||||
|
# - id: vulture
|
||||||
|
# args:
|
||||||
|
# - --min-confidence=80
|
||||||
|
# - --exclude=.venv,Bash,__pycache__
|
||||||
|
# exclude: ^(Bash/|\.venv/)
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# PYUPGRADE - Upgrade Python syntax (disabled - incompatible with Python 3.14)
|
||||||
|
# ===========================================================================
|
||||||
|
# - repo: https://github.com/asottile/pyupgrade
|
||||||
|
# rev: v3.19.0
|
||||||
|
# hooks:
|
||||||
|
# - id: pyupgrade
|
||||||
|
# args:
|
||||||
|
# - --py310-plus
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# CODESPELL - Spell checking in code (expanded ignore list for non-English)
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: https://github.com/codespell-project/codespell
|
||||||
|
rev: v2.3.0
|
||||||
|
hooks:
|
||||||
|
- id: codespell
|
||||||
|
args:
|
||||||
|
- --skip=*.json,*.lock,*.min.js,*.min.css,.git,__pycache__,.venv,*.txt
|
||||||
|
- --ignore-words-list=als,ans,ect,nd,som,sur,te,nam,numer,lew,sie,wil,postion,clen,ther,folow,derrive,ony,tje,noe,theses,crate,doubleclick,wile,tabel,pary,blok,bloc,proces,serwer,parametr,adres,hart,dout,metod,tekst,synonim,grup,mosty,lokal,skalar,milion,nowe,tre,hel,alph
|
||||||
|
exclude: ^(Bash/ffmpeg-build/|LaTeX/|.*\.geojson$)
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# DOCFORMATTER - Format docstrings (disabled - causes recursion errors)
|
||||||
|
# ===========================================================================
|
||||||
|
# - repo: local
|
||||||
|
# hooks:
|
||||||
|
# - id: docformatter
|
||||||
|
# name: docformatter
|
||||||
|
# entry: docformatter
|
||||||
|
# language: system
|
||||||
|
# types: [python]
|
||||||
|
# args:
|
||||||
|
# - --in-place
|
||||||
|
# - --wrap-summaries=88
|
||||||
|
# - --wrap-descriptions=88
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# INTERROGATE - Docstring coverage (disabled - causes recursion on large files)
|
||||||
|
# ===========================================================================
|
||||||
|
# - repo: https://github.com/econchick/interrogate
|
||||||
|
# rev: 1.7.0
|
||||||
|
# hooks:
|
||||||
|
# - id: interrogate
|
||||||
|
# args:
|
||||||
|
# - --fail-under=0
|
||||||
|
# - --verbose
|
||||||
|
# - --ignore-init-method
|
||||||
|
# - --ignore-init-module
|
||||||
|
# - --ignore-magic
|
||||||
|
# - --ignore-private
|
||||||
|
# - --ignore-semiprivate
|
||||||
|
# - --exclude=Bash,.venv,__pycache__
|
||||||
|
# pass_filenames: false
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# AUTOFLAKE - Remove unused imports/variables
|
||||||
|
# Disabled: fully redundant with ruff (F401, F841, F811) + --fix
|
||||||
|
# ===========================================================================
|
||||||
|
# - repo: https://github.com/PyCQA/autoflake
|
||||||
|
# rev: v2.3.1
|
||||||
|
# hooks:
|
||||||
|
# - id: autoflake
|
||||||
|
# args:
|
||||||
|
# - --in-place
|
||||||
|
# - --remove-all-unused-imports
|
||||||
|
# - --remove-unused-variables
|
||||||
|
# - --remove-duplicate-keys
|
||||||
|
# - --expand-star-imports
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# SAFETY - Check for security vulnerabilities in dependencies
|
||||||
|
# ===========================================================================
|
||||||
|
# Note: Safety requires API key for full functionality, disabled by default
|
||||||
|
# - repo: https://github.com/Lucas-C/pre-commit-hooks-safety
|
||||||
|
# rev: v1.3.2
|
||||||
|
# hooks:
|
||||||
|
# - id: python-safety-dependencies-check
|
||||||
|
# files: requirements.*\.txt$
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# PYRIGHT - Microsoft's type checker (very strict, optional)
|
||||||
|
# ===========================================================================
|
||||||
|
# Uncomment to enable - can be slow and very strict
|
||||||
|
# - repo: https://github.com/RobertCraiworthy/pyright-action
|
||||||
|
# rev: v1.1.350
|
||||||
|
# hooks:
|
||||||
|
# - id: pyright
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# CHECK JSON/YAML/TOML formatting (runs on push only — slow Node.js startup)
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||||
|
rev: v4.0.0-alpha.8
|
||||||
|
hooks:
|
||||||
|
- id: prettier
|
||||||
|
types_or: [yaml, json, markdown]
|
||||||
|
exclude: ^(Bash/|\.venv/|.*\.lock$)
|
||||||
|
stages: [pre-push]
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# SHELLCHECK - Shell script linting
|
||||||
|
# Wrapper batches files to avoid OOM on large repos.
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: shellcheck
|
||||||
|
name: shellcheck
|
||||||
|
entry: bash -c 'printf "%s\0" "$@" | xargs -0 -n 40 shellcheck --severity=warning' --
|
||||||
|
language: system
|
||||||
|
types: [shell]
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# CHECK PYTHON LOCATION - All Python files must be under python_pkg/
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: check-python-location
|
||||||
|
name: check Python files are under python_pkg/
|
||||||
|
entry: scripts/check_python_location.sh
|
||||||
|
language: script
|
||||||
|
types: [python]
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# REMOVE EMPTY DIRECTORIES - Clean up empty folders in the repo
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: remove-empty-dirs
|
||||||
|
name: remove empty directories
|
||||||
|
entry: find . -type d -empty -not -path './.git/*' -delete -print
|
||||||
|
language: system
|
||||||
|
pass_filenames: false
|
||||||
|
always_run: true
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# SECRET PATTERNS - Block commits containing sensitive data
|
||||||
|
# ===========================================================================
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: check-no-secrets
|
||||||
|
name: check for leaked secrets
|
||||||
|
entry: scripts/check_no_secrets.sh
|
||||||
|
language: script
|
||||||
|
exclude: ^(\.secret-patterns|\.pre-commit-config\.yaml|.*\.geojson)$
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# COMMITIZEN - Conventional commits (optional)
|
||||||
|
# ===========================================================================
|
||||||
|
# - repo: https://github.com/commitizen-tools/commitizen
|
||||||
|
# rev: v3.13.0
|
||||||
|
# hooks:
|
||||||
|
# - id: commitizen
|
||||||
|
# - id: commitizen-branch
|
||||||
|
# stages: [push]
|
||||||
346
meta/lint_python.sh
Executable file
346
meta/lint_python.sh
Executable file
@ -0,0 +1,346 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# ==============================================================================
|
||||||
|
# Python Linting Script - Run ALL linters with aggressive settings
|
||||||
|
# ==============================================================================
|
||||||
|
# Usage:
|
||||||
|
# ./lint_python.sh # Lint all Python files
|
||||||
|
# ./lint_python.sh --fix # Lint and auto-fix where possible
|
||||||
|
# ./lint_python.sh <file.py> # Lint specific file
|
||||||
|
# ./lint_python.sh --quick # Quick lint (ruff + mypy only)
|
||||||
|
# ./lint_python.sh --report # Generate detailed reports
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
MAGENTA='\033[0;35m'
|
||||||
|
CYAN='\033[0;36m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
BOLD='\033[1m'
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="${SCRIPT_DIR}"
|
||||||
|
PYTHON_PATHS=(
|
||||||
|
"PYTHON"
|
||||||
|
"articles"
|
||||||
|
"poker-modifier-app"
|
||||||
|
"tests"
|
||||||
|
)
|
||||||
|
EXCLUDE_PATHS=(
|
||||||
|
".venv"
|
||||||
|
"__pycache__"
|
||||||
|
".git"
|
||||||
|
"Bash/ffmpeg-build"
|
||||||
|
".pytest_cache"
|
||||||
|
".ruff_cache"
|
||||||
|
".mypy_cache"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build exclude pattern for find
|
||||||
|
EXCLUDE_PATTERN=""
|
||||||
|
for path in "${EXCLUDE_PATHS[@]}"; do
|
||||||
|
EXCLUDE_PATTERN="${EXCLUDE_PATTERN} -path '*/${path}/*' -prune -o"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
FIX_MODE=false
|
||||||
|
QUICK_MODE=false
|
||||||
|
REPORT_MODE=false
|
||||||
|
TARGET_FILES=""
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--fix|-f)
|
||||||
|
FIX_MODE=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--quick|-q)
|
||||||
|
QUICK_MODE=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--report|-r)
|
||||||
|
REPORT_MODE=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--help|-h)
|
||||||
|
echo "Usage: $0 [OPTIONS] [FILES...]"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " --fix, -f Auto-fix issues where possible"
|
||||||
|
echo " --quick, -q Quick mode (ruff + mypy only)"
|
||||||
|
echo " --report, -r Generate detailed reports to ./lint-reports/"
|
||||||
|
echo " --help, -h Show this help message"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " $0 # Lint all Python files"
|
||||||
|
echo " $0 --fix # Lint and auto-fix"
|
||||||
|
echo " $0 PYTHON/ # Lint specific directory"
|
||||||
|
echo " $0 --quick --fix # Quick lint with auto-fix"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
TARGET_FILES="${TARGET_FILES} $1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# If no target specified, use default paths
|
||||||
|
if [[ -z "${TARGET_FILES}" ]]; then
|
||||||
|
TARGET_FILES="${PYTHON_PATHS[*]}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create reports directory if needed
|
||||||
|
if [[ "${REPORT_MODE}" == true ]]; then
|
||||||
|
mkdir -p "${PROJECT_ROOT}/lint-reports"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Track overall status
|
||||||
|
OVERALL_STATUS=0
|
||||||
|
FAILED_TOOLS=()
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# Helper functions
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
print_header() {
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
|
||||||
|
echo -e "${BOLD}${BLUE} $1${NC}"
|
||||||
|
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_subheader() {
|
||||||
|
echo ""
|
||||||
|
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
|
||||||
|
echo -e "${CYAN} $1${NC}"
|
||||||
|
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_success() {
|
||||||
|
echo -e "${GREEN}✓${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_warning() {
|
||||||
|
echo -e "${YELLOW}⚠${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_error() {
|
||||||
|
echo -e "${RED}✗${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_info() {
|
||||||
|
echo -e "${BLUE}ℹ${NC} $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_tool() {
|
||||||
|
local tool_name="$1"
|
||||||
|
local tool_cmd="$2"
|
||||||
|
local report_file="${PROJECT_ROOT}/lint-reports/${tool_name}.txt"
|
||||||
|
|
||||||
|
print_subheader "Running ${tool_name}..."
|
||||||
|
|
||||||
|
if [[ "${REPORT_MODE}" == true ]]; then
|
||||||
|
if eval "${tool_cmd}" 2>&1 | tee "${report_file}"; then
|
||||||
|
print_success "${tool_name} passed"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
print_error "${tool_name} found issues (see ${report_file})"
|
||||||
|
FAILED_TOOLS+=("${tool_name}")
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if eval "${tool_cmd}"; then
|
||||||
|
print_success "${tool_name} passed"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
print_error "${tool_name} found issues"
|
||||||
|
FAILED_TOOLS+=("${tool_name}")
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check_tool() {
|
||||||
|
if command -v "$1" &> /dev/null; then
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
print_warning "$1 not found, skipping..."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# Main linting process
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
print_header "Python Linting Suite - Aggressive Mode"
|
||||||
|
echo ""
|
||||||
|
print_info "Target: ${TARGET_FILES}"
|
||||||
|
print_info "Fix mode: ${FIX_MODE}"
|
||||||
|
print_info "Quick mode: ${QUICK_MODE}"
|
||||||
|
print_info "Report mode: ${REPORT_MODE}"
|
||||||
|
|
||||||
|
cd "${PROJECT_ROOT}"
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# RUFF - Primary linter and formatter
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool ruff; then
|
||||||
|
if [[ "${FIX_MODE}" == true ]]; then
|
||||||
|
run_tool "ruff-lint" "ruff check --fix --show-fixes ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
run_tool "ruff-format" "ruff format ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
else
|
||||||
|
run_tool "ruff-lint" "ruff check ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
run_tool "ruff-format-check" "ruff format --check ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# MYPY - Static type checking
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool mypy; then
|
||||||
|
run_tool "mypy" "mypy --strict --ignore-missing-imports ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Quick mode exits here
|
||||||
|
if [[ "${QUICK_MODE}" == true ]]; then
|
||||||
|
print_header "Quick Lint Complete"
|
||||||
|
if [[ ${#FAILED_TOOLS[@]} -gt 0 ]]; then
|
||||||
|
print_error "Failed tools: ${FAILED_TOOLS[*]}"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
print_success "All quick checks passed!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# PYLINT - Comprehensive linting
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool pylint; then
|
||||||
|
run_tool "pylint" "pylint --rcfile=pyproject.toml --jobs=0 --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# BANDIT - Security linting
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool bandit; then
|
||||||
|
run_tool "bandit" "bandit -c pyproject.toml -r ${TARGET_FILES} --severity-level low --confidence-level low" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# VULTURE - Dead code detection
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool vulture; then
|
||||||
|
run_tool "vulture" "vulture --min-confidence 80 ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# FLAKE8 - Traditional linter
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool flake8; then
|
||||||
|
run_tool "flake8" "flake8 --max-line-length=88 --extend-ignore=E203,W503 --max-complexity=10 ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# PYCODESTYLE - PEP 8 style checker
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool pycodestyle; then
|
||||||
|
run_tool "pycodestyle" "pycodestyle --max-line-length=88 --ignore=E203,W503 ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# PYDOCSTYLE - Docstring style checker
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool pydocstyle; then
|
||||||
|
run_tool "pydocstyle" "pydocstyle --convention=google ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# RADON - Complexity metrics
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool radon; then
|
||||||
|
print_subheader "Running radon (complexity analysis)..."
|
||||||
|
echo ""
|
||||||
|
echo -e "${MAGENTA}Cyclomatic Complexity:${NC}"
|
||||||
|
radon cc -a -s ${TARGET_FILES} || true
|
||||||
|
echo ""
|
||||||
|
echo -e "${MAGENTA}Maintainability Index:${NC}"
|
||||||
|
radon mi -s ${TARGET_FILES} || true
|
||||||
|
|
||||||
|
if [[ "${REPORT_MODE}" == true ]]; then
|
||||||
|
radon cc -a -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-cc.txt" 2>&1 || true
|
||||||
|
radon mi -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-mi.txt" 2>&1 || true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# INTERROGATE - Docstring coverage
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool interrogate; then
|
||||||
|
run_tool "interrogate" "interrogate -v --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# PYRIGHT - Microsoft's type checker (optional, very strict)
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool pyright; then
|
||||||
|
run_tool "pyright" "pyright ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# AUTOFLAKE - Unused imports/variables (fix mode only)
|
||||||
|
# ==============================================================================
|
||||||
|
if [[ "${FIX_MODE}" == true ]] && check_tool autoflake; then
|
||||||
|
print_subheader "Running autoflake (removing unused imports)..."
|
||||||
|
find ${TARGET_FILES} -name "*.py" -type f -exec autoflake --in-place --remove-all-unused-imports --remove-unused-variables {} \;
|
||||||
|
print_success "autoflake completed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# PYUPGRADE - Upgrade Python syntax (fix mode only)
|
||||||
|
# ==============================================================================
|
||||||
|
if [[ "${FIX_MODE}" == true ]] && check_tool pyupgrade; then
|
||||||
|
print_subheader "Running pyupgrade (upgrading syntax to Python 3.10+)..."
|
||||||
|
find ${TARGET_FILES} -name "*.py" -type f -exec pyupgrade --py310-plus {} \;
|
||||||
|
print_success "pyupgrade completed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# CODESPELL - Spell checking
|
||||||
|
# ==============================================================================
|
||||||
|
if check_tool codespell; then
|
||||||
|
if [[ "${FIX_MODE}" == true ]]; then
|
||||||
|
run_tool "codespell" "codespell -w --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
else
|
||||||
|
run_tool "codespell" "codespell --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# Summary
|
||||||
|
# ==============================================================================
|
||||||
|
print_header "Linting Summary"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [[ ${OVERALL_STATUS} -ne 0 ]]; then
|
||||||
|
print_error "The following tools reported issues:"
|
||||||
|
for tool in "${FAILED_TOOLS[@]}"; do
|
||||||
|
echo " - ${tool}"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
if [[ "${REPORT_MODE}" == true ]]; then
|
||||||
|
print_info "Detailed reports saved to: ${PROJECT_ROOT}/lint-reports/"
|
||||||
|
fi
|
||||||
|
print_info "Run with --fix to auto-fix issues where possible"
|
||||||
|
exit ${OVERALL_STATUS}
|
||||||
|
else
|
||||||
|
print_success "All linting checks passed!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
308
meta/pyproject.toml
Normal file
308
meta/pyproject.toml
Normal file
@ -0,0 +1,308 @@
|
|||||||
|
[project]
|
||||||
|
name = "testsandmisc"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Collection of miscellaneous tests and scripts"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# RUFF - Extremely fast Python linter and formatter (written in Rust)
|
||||||
|
# ============================================================================
|
||||||
|
[tool.ruff]
|
||||||
|
target-version = "py310"
|
||||||
|
# Include all Python files
|
||||||
|
include = ["*.py", "**/*.py"]
|
||||||
|
# Exclude vendored/build directories
|
||||||
|
exclude = [
|
||||||
|
".git",
|
||||||
|
".venv",
|
||||||
|
"__pycache__",
|
||||||
|
"build",
|
||||||
|
"dist",
|
||||||
|
".eggs",
|
||||||
|
"Bash/ffmpeg-build", # Vendored FFmpeg tools
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
# AGGRESSIVE: Select ALL rules from all categories
|
||||||
|
select = ["ALL"]
|
||||||
|
# Ignores for rules that are too strict for this mixed script repository
|
||||||
|
ignore = [
|
||||||
|
# D203 vs D211 conflict - we use D211 (no blank line before class docstring)
|
||||||
|
"D203", # 1 blank line required before class docstring (conflicts with D211)
|
||||||
|
# D212 vs D213 conflict - we use D212 (summary on first line after """)
|
||||||
|
"D213", # Multi-line docstring summary should start at second line (conflicts with D212)
|
||||||
|
# Formatter conflicts - recommended to disable when using ruff format
|
||||||
|
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
|
||||||
|
"COM812", # Trailing comma missing - formatter handles this automatically
|
||||||
|
"ISC001", # Implicit string concatenation - formatter may create these when wrapping
|
||||||
|
# Security audit - prone to false positives with validated input
|
||||||
|
# https://github.com/astral-sh/ruff/issues/4045
|
||||||
|
"S603", # subprocess call without shell - prone to false positives as it is
|
||||||
|
# difficult to determine whether the passed arguments have been validated
|
||||||
|
]
|
||||||
|
|
||||||
|
# Allow ALL rules to be auto-fixed
|
||||||
|
fixable = ["ALL"]
|
||||||
|
unfixable = []
|
||||||
|
|
||||||
|
# Per-file ignores — only rules that FUNDAMENTALLY conflict with test code remain.
|
||||||
|
# Every other rule was fixed in source. See justifications below.
|
||||||
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
"**/tests/**/*.py" = [
|
||||||
|
"ARG", # @patch decorators inject mock params that aren't always referenced;
|
||||||
|
# the patch side-effect is needed, not the mock object itself.
|
||||||
|
"D", # Test names like test_sub_cards_no_answer_text are self-documenting;
|
||||||
|
# docstrings would be redundant noise on every test method.
|
||||||
|
"PLC0415", # Test isolation requires importing AFTER mocking sys.modules;
|
||||||
|
# top-level imports would bypass the mocks entirely.
|
||||||
|
"PLR2004", # assert count == 5 is clearer than assert count == EXPECTED_COUNT;
|
||||||
|
# named constants for test expectations add indirection without value.
|
||||||
|
"S101", # assert IS what tests do — every Python test suite suppresses this.
|
||||||
|
"SLF001", # Unit tests must exercise private internals (_method, _attr) to reach
|
||||||
|
# 100% branch coverage; only integration tests can avoid this.
|
||||||
|
]
|
||||||
|
"**/test_*.py" = [
|
||||||
|
"ARG",
|
||||||
|
"D",
|
||||||
|
"PLC0415",
|
||||||
|
"PLR2004",
|
||||||
|
"S101",
|
||||||
|
"SLF001",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[tool.ruff.lint.pydocstyle]
|
||||||
|
convention = "google" # Use Google docstring convention
|
||||||
|
|
||||||
|
[tool.ruff.lint.isort]
|
||||||
|
force-single-line = false
|
||||||
|
force-sort-within-sections = true
|
||||||
|
known-first-party = ["python_pkg"]
|
||||||
|
|
||||||
|
[tool.ruff.lint.flake8-quotes]
|
||||||
|
docstring-quotes = "double"
|
||||||
|
inline-quotes = "double"
|
||||||
|
|
||||||
|
[tool.ruff.lint.flake8-tidy-imports]
|
||||||
|
ban-relative-imports = "all"
|
||||||
|
|
||||||
|
[tool.ruff.format]
|
||||||
|
quote-style = "double"
|
||||||
|
indent-style = "space"
|
||||||
|
skip-magic-trailing-comma = false
|
||||||
|
line-ending = "auto"
|
||||||
|
docstring-code-format = true
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# MYPY - Static type checker (most aggressive settings)
|
||||||
|
# ============================================================================
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.10"
|
||||||
|
# Strict mode enables most checks
|
||||||
|
strict = true
|
||||||
|
# Additional aggressive settings
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unused_configs = true
|
||||||
|
disallow_untyped_defs = true
|
||||||
|
disallow_incomplete_defs = true
|
||||||
|
check_untyped_defs = true
|
||||||
|
disallow_untyped_decorators = true
|
||||||
|
no_implicit_optional = true
|
||||||
|
warn_redundant_casts = true
|
||||||
|
warn_unused_ignores = true
|
||||||
|
warn_no_return = true
|
||||||
|
warn_unreachable = true
|
||||||
|
# Extra strict settings
|
||||||
|
disallow_any_unimported = true
|
||||||
|
disallow_any_explicit = false # Too aggressive for practical use
|
||||||
|
disallow_any_generics = true
|
||||||
|
disallow_subclassing_any = true
|
||||||
|
strict_equality = true
|
||||||
|
extra_checks = true
|
||||||
|
# Allow missing imports for third-party packages
|
||||||
|
ignore_missing_imports = true
|
||||||
|
# Show error codes
|
||||||
|
show_error_codes = true
|
||||||
|
# Enable colored output
|
||||||
|
color_output = true
|
||||||
|
# Exclude vendored directories
|
||||||
|
exclude = [
|
||||||
|
"Bash/ffmpeg-build/",
|
||||||
|
".venv/",
|
||||||
|
"linux_configuration/scripts/misc/testsAndMisc-bash/tools/", # Avoid duplicate module named 'tools'
|
||||||
|
]
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PYLINT - Comprehensive Python linter
|
||||||
|
# ============================================================================
|
||||||
|
[tool.pylint.main]
|
||||||
|
# Analyse import fallback blocks
|
||||||
|
analyse-fallback-blocks = true
|
||||||
|
# Pickle collected data for later comparisons
|
||||||
|
persistent = true
|
||||||
|
# Jobs to use for parallel execution (0 = auto)
|
||||||
|
jobs = 0
|
||||||
|
# Minimum Python version
|
||||||
|
py-version = "3.10"
|
||||||
|
# Ignore vendored directories
|
||||||
|
ignore = ["Bash", ".venv", "__pycache__"]
|
||||||
|
# Ignore patterns
|
||||||
|
ignore-patterns = [".*\\.pyi$"]
|
||||||
|
# Allow C extension modules to be introspected
|
||||||
|
extension-pkg-allow-list = ["cv2", "pygame", "lxml"]
|
||||||
|
|
||||||
|
[tool.pylint.messages_control]
|
||||||
|
# Enable all checks by disabling disable
|
||||||
|
enable = "all"
|
||||||
|
# No disabled checks - maximum strictness
|
||||||
|
disable = []
|
||||||
|
|
||||||
|
[tool.pylint.design]
|
||||||
|
# Mixins and single-entry-point classes may have zero public methods
|
||||||
|
min-public-methods = 0
|
||||||
|
# Test modules can be large
|
||||||
|
max-module-lines = 1000
|
||||||
|
# UI/mixin classes accumulate attributes across multiple mixins
|
||||||
|
max-attributes = 10
|
||||||
|
|
||||||
|
[tool.pylint.spelling]
|
||||||
|
# No spelling dictionary to avoid false positives
|
||||||
|
spelling-dict = ""
|
||||||
|
|
||||||
|
[tool.pylint.typecheck]
|
||||||
|
# cv2 (OpenCV) dynamically loads members from C extension at runtime.
|
||||||
|
# unittest.mock.MagicMock generates assertion/introspection methods at runtime.
|
||||||
|
generated-members = [
|
||||||
|
"cv2.*",
|
||||||
|
".*\\.assert_called_once_with",
|
||||||
|
".*\\.assert_called_once",
|
||||||
|
".*\\.assert_called",
|
||||||
|
".*\\.assert_not_called",
|
||||||
|
".*\\.assert_any_call",
|
||||||
|
".*\\.call_args",
|
||||||
|
".*\\.call_args_list",
|
||||||
|
".*\\.call_count",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# BANDIT - Security linter
|
||||||
|
# ============================================================================
|
||||||
|
[tool.bandit]
|
||||||
|
# Exclude test directories and vendored code
|
||||||
|
exclude_dirs = ["tests", ".venv", "Bash/ffmpeg-build"]
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# BLACK & ISORT - Removed (ruff handles formatting and import sorting)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PYTEST - Testing framework configuration
|
||||||
|
# ============================================================================
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["python_pkg"]
|
||||||
|
python_files = ["test_*.py", "*_test.py"]
|
||||||
|
python_classes = ["Test*"]
|
||||||
|
python_functions = ["test_*"]
|
||||||
|
addopts = [
|
||||||
|
"-v",
|
||||||
|
"--strict-markers",
|
||||||
|
"--strict-config",
|
||||||
|
"-ra",
|
||||||
|
"--cov=python_pkg",
|
||||||
|
"--cov-branch",
|
||||||
|
"--cov-report=term-missing",
|
||||||
|
"--cov-report=lcov",
|
||||||
|
]
|
||||||
|
filterwarnings = [
|
||||||
|
"error",
|
||||||
|
"ignore::DeprecationWarning",
|
||||||
|
"default::pytest.PytestUnraisableExceptionWarning",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# COVERAGE - Code coverage configuration
|
||||||
|
# ============================================================================
|
||||||
|
[tool.coverage.run]
|
||||||
|
source = ["python_pkg"]
|
||||||
|
branch = true
|
||||||
|
omit = [
|
||||||
|
"*/__pycache__/*",
|
||||||
|
"*/tests/*",
|
||||||
|
"*/.venv/*",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.coverage.report]
|
||||||
|
# Fail under this percentage
|
||||||
|
fail_under = 100
|
||||||
|
show_missing = true
|
||||||
|
skip_covered = false
|
||||||
|
exclude_lines = [
|
||||||
|
# Standard exclusions
|
||||||
|
"pragma: no cover",
|
||||||
|
# Unreachable defensive code
|
||||||
|
"raise NotImplementedError",
|
||||||
|
"raise AssertionError",
|
||||||
|
# Type checking imports
|
||||||
|
"if TYPE_CHECKING:",
|
||||||
|
# Main script entry point
|
||||||
|
'if __name__ == "__main__":',
|
||||||
|
]
|
||||||
|
# Partial branch exclusions for unreachable branches
|
||||||
|
partial_branches = [
|
||||||
|
"pragma: no branch",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# VULTURE - Dead code detection
|
||||||
|
# ============================================================================
|
||||||
|
# Note: Vulture uses command-line args, but we can document settings here
|
||||||
|
# vulture --min-confidence 80 --exclude ".venv,Bash" .
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# FLAKE8 - Python linter (via Flake8-pyproject for pyproject.toml support)
|
||||||
|
# ============================================================================
|
||||||
|
[tool.flake8]
|
||||||
|
# Maximum line length (matches ruff/black)
|
||||||
|
max-line-length = 88
|
||||||
|
# Maximum McCabe complexity (matches ruff C901 threshold)
|
||||||
|
max-complexity = 10
|
||||||
|
# Maximum cognitive complexity (flake8-cognitive-complexity)
|
||||||
|
max-cognitive-complexity = 12
|
||||||
|
# Maximum function length (flake8-functions)
|
||||||
|
max-function-length = 20
|
||||||
|
# Maximum returns/arguments per function
|
||||||
|
max-returns-amount = 6
|
||||||
|
max-arguments = 5
|
||||||
|
# Docstring convention (matches ruff)
|
||||||
|
docstring-convention = "google"
|
||||||
|
# Select all error codes
|
||||||
|
select = ["E", "F", "W", "C", "B", "B950"]
|
||||||
|
# Extend with plugin codes
|
||||||
|
extend-select = ["B", "B9", "C4", "SIM", "PT", "TC", "ANN"]
|
||||||
|
# Ignore rules that conflict with ruff-format or are duplicated
|
||||||
|
extend-ignore = [
|
||||||
|
"E501", # Line too long - B950 from bugbear is smarter (allows 10% overflow)
|
||||||
|
"W503", # Line break before binary operator - contradicts PEP 8 update
|
||||||
|
"ANN101", # Missing type annotation for self
|
||||||
|
"ANN102", # Missing type annotation for cls
|
||||||
|
]
|
||||||
|
# Exclude directories
|
||||||
|
exclude = [
|
||||||
|
".git",
|
||||||
|
".venv",
|
||||||
|
"__pycache__",
|
||||||
|
"build",
|
||||||
|
"dist",
|
||||||
|
".eggs",
|
||||||
|
"Bash/ffmpeg-build",
|
||||||
|
]
|
||||||
|
# Per-file ignores
|
||||||
|
per-file-ignores = [
|
||||||
|
"**/tests/**/*.py:S101,ANN",
|
||||||
|
"**/test_*.py:S101,ANN",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PYDOCSTYLE - Docstring style checker (ruff handles this, but for standalone)
|
||||||
|
# ============================================================================
|
||||||
|
# Configured in ruff.lint.pydocstyle above
|
||||||
79
meta/requirements.txt
Normal file
79
meta/requirements.txt
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
# ==============================================================================
|
||||||
|
# testsAndMisc — combined runtime + development dependencies
|
||||||
|
# Install with: pip install -r meta/requirements.txt
|
||||||
|
# Sorted alphabetically (enforced by pre-commit `requirements-txt-fixer`).
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
add-trailing-comma>=3.1.0
|
||||||
|
aiohttp>=3.9
|
||||||
|
autoflake>=2.2.0
|
||||||
|
autopep8>=2.0.0
|
||||||
|
bandit>=1.7.0
|
||||||
|
beautifulsoup4>=4.0
|
||||||
|
berserk>=0.13
|
||||||
|
black>=24.0.0
|
||||||
|
bottle>=0.12
|
||||||
|
codespell>=2.2.0
|
||||||
|
coverage>=7.4.0
|
||||||
|
darglint>=1.8.0
|
||||||
|
dead>=1.5.0
|
||||||
|
docformatter>=1.7.0
|
||||||
|
fixit>=2.1.0
|
||||||
|
flake8>=7.0.0
|
||||||
|
flake8-annotations>=3.0.0
|
||||||
|
flake8-bandit>=4.1.0
|
||||||
|
flake8-bugbear>=24.0.0
|
||||||
|
flake8-comprehensions>=3.14.0
|
||||||
|
flake8-docstrings>=1.7.0
|
||||||
|
flake8-eradicate>=1.5.0
|
||||||
|
flake8-pie>=0.16.0
|
||||||
|
flake8-print>=5.0.0
|
||||||
|
flake8-pyi>=24.0.0
|
||||||
|
flake8-pytest-style>=2.0.0
|
||||||
|
flake8-return>=1.2.0
|
||||||
|
flake8-simplify>=0.21.0
|
||||||
|
genanki>=0.13
|
||||||
|
geopandas>=1.0
|
||||||
|
howlongtobeatpy>=1.0
|
||||||
|
hypothesis>=6.98.0
|
||||||
|
importlib-metadata>=7.0.0
|
||||||
|
interrogate>=1.5.0
|
||||||
|
isort>=5.13.0
|
||||||
|
lxml>=5.0
|
||||||
|
matplotlib>=3.0
|
||||||
|
mccabe>=0.7.0
|
||||||
|
mitmproxy>=10.0
|
||||||
|
mypy>=1.8.0
|
||||||
|
numpy>=1.20
|
||||||
|
opencv-python>=4.0
|
||||||
|
pillow>=10.0
|
||||||
|
pip-audit>=2.6.0
|
||||||
|
pipdeptree>=2.14.0
|
||||||
|
pre-commit>=3.6.0
|
||||||
|
prospector>=1.10.0
|
||||||
|
pycodestyle>=2.11.0
|
||||||
|
pydocstyle>=6.3.0
|
||||||
|
pyflakes>=3.2.0
|
||||||
|
pygame>=2.0
|
||||||
|
pylama>=8.4.0
|
||||||
|
pylint>=3.0.0
|
||||||
|
pyright>=1.1.350
|
||||||
|
pytest>=8.0.0
|
||||||
|
pytest-cov>=4.1.0
|
||||||
|
pytest-randomly>=3.15.0
|
||||||
|
pytest-sugar>=1.0.0
|
||||||
|
pytest-timeout>=2.2.0
|
||||||
|
pytest-xdist>=3.5.0
|
||||||
|
python-chess>=1.999
|
||||||
|
pyupgrade>=3.15.0
|
||||||
|
radon>=6.0.0
|
||||||
|
reorder-python-imports>=3.12.0
|
||||||
|
requests>=2.0
|
||||||
|
ruff>=0.8.0
|
||||||
|
safety>=2.3.0
|
||||||
|
selenium>=4.0
|
||||||
|
types-python-dateutil>=2.8.0
|
||||||
|
types-PyYAML>=6.0.0
|
||||||
|
types-requests>=2.31.0
|
||||||
|
types-setuptools>=69.0.0
|
||||||
|
websockets>=13.0
|
||||||
148
meta/run.sh
Executable file
148
meta/run.sh
Executable file
@ -0,0 +1,148 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Easy entrypoint for system usage reports and polling script diagnostics.
|
||||||
|
# Usage:
|
||||||
|
# ./run.sh # today's report to stdout
|
||||||
|
# ./run.sh --date 20260501 # specific day
|
||||||
|
# ./run.sh --top 25 # override row count
|
||||||
|
# ./run.sh --profile [duration] # profile polling scripts (default 60s)
|
||||||
|
# ./run.sh --diagnose # find inefficient shell scripts
|
||||||
|
# ./run.sh --init-artifacts ... # bootstrap contract/evidence/session artifacts
|
||||||
|
#
|
||||||
|
# Any other args are forwarded to usage_report.py unchanged.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPORT_SCRIPT="$SCRIPT_DIR/linux_configuration/scripts/system-maintenance/bin/usage_report.py"
|
||||||
|
ARTIFACT_INIT_SCRIPT="$SCRIPT_DIR/scripts/init_agent_artifacts.sh"
|
||||||
|
|
||||||
|
if [[ ! -f "$REPORT_SCRIPT" ]]; then
|
||||||
|
echo "Error: usage_report.py not found at: $REPORT_SCRIPT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f "$ARTIFACT_INIT_SCRIPT" ]]; then
|
||||||
|
echo "Error: init_agent_artifacts.sh not found at: $ARTIFACT_INIT_SCRIPT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Profiling mode: trace fork-heavy scripts over time
|
||||||
|
profile_polling_scripts() {
|
||||||
|
local duration="${1:-60}"
|
||||||
|
echo "=== Polling Script Profiler (${duration}s) ===" >&2
|
||||||
|
echo "Tracing fork/exec calls in shell scripts..." >&2
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
# Find common polling script processes and trace them
|
||||||
|
local trace_file="/tmp/polling_trace_$$.txt"
|
||||||
|
|
||||||
|
# Use perf/strace to capture system calls
|
||||||
|
(
|
||||||
|
timeout "$duration" strace -f -e trace=clone,execve -c -p $$ 2>&1 || true
|
||||||
|
) > "$trace_file" 2>&1
|
||||||
|
|
||||||
|
echo "Trace completed. Analyzing results:" >&2
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
# Show fork/exec heavy processes
|
||||||
|
if ! grep -e "execve" -e "clone" "$trace_file" | head -20; then
|
||||||
|
:
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f "$trace_file"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Diagnostic mode: find inefficient patterns in shell scripts
|
||||||
|
diagnose_polling_scripts() {
|
||||||
|
echo "=== Shell Script Efficiency Audit ===" >&2
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
local issues_found=0
|
||||||
|
|
||||||
|
# Check for common anti-patterns
|
||||||
|
echo "Checking for anti-patterns in shell scripts..." >&2
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
# Pattern 1: while true with sleep (no event-driven check)
|
||||||
|
echo "1. Polling loops (while true + sleep):" >&2
|
||||||
|
set +e
|
||||||
|
grep -r "while true\|while :" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
||||||
|
| grep -v "Binary" | grep -v ".git" | head -5
|
||||||
|
set -e
|
||||||
|
issues_found=$((issues_found + 1))
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
# Pattern 2: $(date +...) calls in loops (fork-heavy)
|
||||||
|
echo "2. Excessive date calls (each forks a process):" >&2
|
||||||
|
set +e
|
||||||
|
grep -r '\$(date' --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
||||||
|
| grep -v "Binary" | grep -v ".git" | head -5
|
||||||
|
set -e
|
||||||
|
issues_found=$((issues_found + 1))
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
# Pattern 3: pgrep/xdotool in loops
|
||||||
|
echo "3. Process inspection in loops (pgrep, xdotool):" >&2
|
||||||
|
set +e
|
||||||
|
grep -r "while.*pgrep\|while.*xdotool\|pgrep.*while" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
||||||
|
| grep -v "Binary" | grep -v ".git" | head -5
|
||||||
|
set -e
|
||||||
|
issues_found=$((issues_found + 1))
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
# Pattern 4: pipes in hot paths
|
||||||
|
echo "4. Heavy pipes in polling scripts (| awk, | grep, | tr):" >&2
|
||||||
|
set +e
|
||||||
|
while_true_file_list="$(mktemp)"
|
||||||
|
heavy_pipe_matches="$(mktemp)"
|
||||||
|
grep -r "while true" --include="*.sh" "$SCRIPT_DIR" > "$while_true_file_list" 2>/dev/null
|
||||||
|
if [ -s "$while_true_file_list" ]; then
|
||||||
|
xargs grep -l -e " | awk" -e " | grep" -e " | tr" < "$while_true_file_list" > "$heavy_pipe_matches" 2>/dev/null
|
||||||
|
head -5 "$heavy_pipe_matches"
|
||||||
|
fi
|
||||||
|
rm -f "$while_true_file_list" "$heavy_pipe_matches"
|
||||||
|
set -e
|
||||||
|
issues_found=$((issues_found + 1))
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
# Pattern 5: sleep with very short intervals
|
||||||
|
echo "5. Aggressive polling (sleep < 1s):" >&2
|
||||||
|
set +e
|
||||||
|
grep -rE "sleep 0\.[0-9]|sleep 0[^0-9]" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
||||||
|
| grep -v "Binary" | grep -v ".git" | head -5
|
||||||
|
set -e
|
||||||
|
issues_found=$((issues_found + 1))
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
|
echo "=== Recommendations ===" >&2
|
||||||
|
echo "1. Replace 'while true + sleep' with event-driven I/O (inotifywait, read -t, etc.)" >&2
|
||||||
|
echo "2. Use /proc and /sys instead of forking date, sensors, acpi, etc." >&2
|
||||||
|
echo "3. Cache frequently accessed values (e.g., in /tmp state files)" >&2
|
||||||
|
echo "4. Use bash builtins: printf %()T instead of date, \${var//} instead of tr, etc." >&2
|
||||||
|
echo "5. Use i3blocks interval=persist + event loop instead of polling mode" >&2
|
||||||
|
echo "6. Increase polling intervals: 1s → 5s → 10s where acceptable" >&2
|
||||||
|
}
|
||||||
|
|
||||||
|
# Handle special modes
|
||||||
|
case "${1:-}" in
|
||||||
|
--profile)
|
||||||
|
profile_polling_scripts "${2:-60}"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--diagnose)
|
||||||
|
diagnose_polling_scripts
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--init-artifacts)
|
||||||
|
shift
|
||||||
|
exec "$ARTIFACT_INIT_SCRIPT" "$@"
|
||||||
|
;;
|
||||||
|
--help)
|
||||||
|
grep '^# Usage:' "$0" | sed 's/^# //' | head -1
|
||||||
|
grep '^# ' "$0" | sed 's/^# / /'
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Default: run usage_report.py with all remaining args
|
||||||
|
exec python3 "$REPORT_SCRIPT" "$@"
|
||||||
308
pyproject.toml
308
pyproject.toml
@ -1,308 +0,0 @@
|
|||||||
[project]
|
|
||||||
name = "testsandmisc"
|
|
||||||
version = "0.1.0"
|
|
||||||
description = "Collection of miscellaneous tests and scripts"
|
|
||||||
requires-python = ">=3.10"
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# RUFF - Extremely fast Python linter and formatter (written in Rust)
|
|
||||||
# ============================================================================
|
|
||||||
[tool.ruff]
|
|
||||||
target-version = "py310"
|
|
||||||
# Include all Python files
|
|
||||||
include = ["*.py", "**/*.py"]
|
|
||||||
# Exclude vendored/build directories
|
|
||||||
exclude = [
|
|
||||||
".git",
|
|
||||||
".venv",
|
|
||||||
"__pycache__",
|
|
||||||
"build",
|
|
||||||
"dist",
|
|
||||||
".eggs",
|
|
||||||
"Bash/ffmpeg-build", # Vendored FFmpeg tools
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.ruff.lint]
|
|
||||||
# AGGRESSIVE: Select ALL rules from all categories
|
|
||||||
select = ["ALL"]
|
|
||||||
# Ignores for rules that are too strict for this mixed script repository
|
|
||||||
ignore = [
|
|
||||||
# D203 vs D211 conflict - we use D211 (no blank line before class docstring)
|
|
||||||
"D203", # 1 blank line required before class docstring (conflicts with D211)
|
|
||||||
# D212 vs D213 conflict - we use D212 (summary on first line after """)
|
|
||||||
"D213", # Multi-line docstring summary should start at second line (conflicts with D212)
|
|
||||||
# Formatter conflicts - recommended to disable when using ruff format
|
|
||||||
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
|
|
||||||
"COM812", # Trailing comma missing - formatter handles this automatically
|
|
||||||
"ISC001", # Implicit string concatenation - formatter may create these when wrapping
|
|
||||||
# Security audit - prone to false positives with validated input
|
|
||||||
# https://github.com/astral-sh/ruff/issues/4045
|
|
||||||
"S603", # subprocess call without shell - prone to false positives as it is
|
|
||||||
# difficult to determine whether the passed arguments have been validated
|
|
||||||
]
|
|
||||||
|
|
||||||
# Allow ALL rules to be auto-fixed
|
|
||||||
fixable = ["ALL"]
|
|
||||||
unfixable = []
|
|
||||||
|
|
||||||
# Per-file ignores — only rules that FUNDAMENTALLY conflict with test code remain.
|
|
||||||
# Every other rule was fixed in source. See justifications below.
|
|
||||||
[tool.ruff.lint.per-file-ignores]
|
|
||||||
"**/tests/**/*.py" = [
|
|
||||||
"ARG", # @patch decorators inject mock params that aren't always referenced;
|
|
||||||
# the patch side-effect is needed, not the mock object itself.
|
|
||||||
"D", # Test names like test_sub_cards_no_answer_text are self-documenting;
|
|
||||||
# docstrings would be redundant noise on every test method.
|
|
||||||
"PLC0415", # Test isolation requires importing AFTER mocking sys.modules;
|
|
||||||
# top-level imports would bypass the mocks entirely.
|
|
||||||
"PLR2004", # assert count == 5 is clearer than assert count == EXPECTED_COUNT;
|
|
||||||
# named constants for test expectations add indirection without value.
|
|
||||||
"S101", # assert IS what tests do — every Python test suite suppresses this.
|
|
||||||
"SLF001", # Unit tests must exercise private internals (_method, _attr) to reach
|
|
||||||
# 100% branch coverage; only integration tests can avoid this.
|
|
||||||
]
|
|
||||||
"**/test_*.py" = [
|
|
||||||
"ARG",
|
|
||||||
"D",
|
|
||||||
"PLC0415",
|
|
||||||
"PLR2004",
|
|
||||||
"S101",
|
|
||||||
"SLF001",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
[tool.ruff.lint.pydocstyle]
|
|
||||||
convention = "google" # Use Google docstring convention
|
|
||||||
|
|
||||||
[tool.ruff.lint.isort]
|
|
||||||
force-single-line = false
|
|
||||||
force-sort-within-sections = true
|
|
||||||
known-first-party = ["python_pkg"]
|
|
||||||
|
|
||||||
[tool.ruff.lint.flake8-quotes]
|
|
||||||
docstring-quotes = "double"
|
|
||||||
inline-quotes = "double"
|
|
||||||
|
|
||||||
[tool.ruff.lint.flake8-tidy-imports]
|
|
||||||
ban-relative-imports = "all"
|
|
||||||
|
|
||||||
[tool.ruff.format]
|
|
||||||
quote-style = "double"
|
|
||||||
indent-style = "space"
|
|
||||||
skip-magic-trailing-comma = false
|
|
||||||
line-ending = "auto"
|
|
||||||
docstring-code-format = true
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# MYPY - Static type checker (most aggressive settings)
|
|
||||||
# ============================================================================
|
|
||||||
[tool.mypy]
|
|
||||||
python_version = "3.10"
|
|
||||||
# Strict mode enables most checks
|
|
||||||
strict = true
|
|
||||||
# Additional aggressive settings
|
|
||||||
warn_return_any = true
|
|
||||||
warn_unused_configs = true
|
|
||||||
disallow_untyped_defs = true
|
|
||||||
disallow_incomplete_defs = true
|
|
||||||
check_untyped_defs = true
|
|
||||||
disallow_untyped_decorators = true
|
|
||||||
no_implicit_optional = true
|
|
||||||
warn_redundant_casts = true
|
|
||||||
warn_unused_ignores = true
|
|
||||||
warn_no_return = true
|
|
||||||
warn_unreachable = true
|
|
||||||
# Extra strict settings
|
|
||||||
disallow_any_unimported = true
|
|
||||||
disallow_any_explicit = false # Too aggressive for practical use
|
|
||||||
disallow_any_generics = true
|
|
||||||
disallow_subclassing_any = true
|
|
||||||
strict_equality = true
|
|
||||||
extra_checks = true
|
|
||||||
# Allow missing imports for third-party packages
|
|
||||||
ignore_missing_imports = true
|
|
||||||
# Show error codes
|
|
||||||
show_error_codes = true
|
|
||||||
# Enable colored output
|
|
||||||
color_output = true
|
|
||||||
# Exclude vendored directories
|
|
||||||
exclude = [
|
|
||||||
"Bash/ffmpeg-build/",
|
|
||||||
".venv/",
|
|
||||||
"linux_configuration/scripts/misc/testsAndMisc-bash/tools/", # Avoid duplicate module named 'tools'
|
|
||||||
]
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PYLINT - Comprehensive Python linter
|
|
||||||
# ============================================================================
|
|
||||||
[tool.pylint.main]
|
|
||||||
# Analyse import fallback blocks
|
|
||||||
analyse-fallback-blocks = true
|
|
||||||
# Pickle collected data for later comparisons
|
|
||||||
persistent = true
|
|
||||||
# Jobs to use for parallel execution (0 = auto)
|
|
||||||
jobs = 0
|
|
||||||
# Minimum Python version
|
|
||||||
py-version = "3.10"
|
|
||||||
# Ignore vendored directories
|
|
||||||
ignore = ["Bash", ".venv", "__pycache__"]
|
|
||||||
# Ignore patterns
|
|
||||||
ignore-patterns = [".*\\.pyi$"]
|
|
||||||
# Allow C extension modules to be introspected
|
|
||||||
extension-pkg-allow-list = ["cv2", "pygame", "lxml"]
|
|
||||||
|
|
||||||
[tool.pylint.messages_control]
|
|
||||||
# Enable all checks by disabling disable
|
|
||||||
enable = "all"
|
|
||||||
# No disabled checks - maximum strictness
|
|
||||||
disable = []
|
|
||||||
|
|
||||||
[tool.pylint.design]
|
|
||||||
# Mixins and single-entry-point classes may have zero public methods
|
|
||||||
min-public-methods = 0
|
|
||||||
# Test modules can be large
|
|
||||||
max-module-lines = 1000
|
|
||||||
# UI/mixin classes accumulate attributes across multiple mixins
|
|
||||||
max-attributes = 10
|
|
||||||
|
|
||||||
[tool.pylint.spelling]
|
|
||||||
# No spelling dictionary to avoid false positives
|
|
||||||
spelling-dict = ""
|
|
||||||
|
|
||||||
[tool.pylint.typecheck]
|
|
||||||
# cv2 (OpenCV) dynamically loads members from C extension at runtime.
|
|
||||||
# unittest.mock.MagicMock generates assertion/introspection methods at runtime.
|
|
||||||
generated-members = [
|
|
||||||
"cv2.*",
|
|
||||||
".*\\.assert_called_once_with",
|
|
||||||
".*\\.assert_called_once",
|
|
||||||
".*\\.assert_called",
|
|
||||||
".*\\.assert_not_called",
|
|
||||||
".*\\.assert_any_call",
|
|
||||||
".*\\.call_args",
|
|
||||||
".*\\.call_args_list",
|
|
||||||
".*\\.call_count",
|
|
||||||
]
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# BANDIT - Security linter
|
|
||||||
# ============================================================================
|
|
||||||
[tool.bandit]
|
|
||||||
# Exclude test directories and vendored code
|
|
||||||
exclude_dirs = ["tests", ".venv", "Bash/ffmpeg-build"]
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# BLACK & ISORT - Removed (ruff handles formatting and import sorting)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PYTEST - Testing framework configuration
|
|
||||||
# ============================================================================
|
|
||||||
[tool.pytest.ini_options]
|
|
||||||
testpaths = ["python_pkg"]
|
|
||||||
python_files = ["test_*.py", "*_test.py"]
|
|
||||||
python_classes = ["Test*"]
|
|
||||||
python_functions = ["test_*"]
|
|
||||||
addopts = [
|
|
||||||
"-v",
|
|
||||||
"--strict-markers",
|
|
||||||
"--strict-config",
|
|
||||||
"-ra",
|
|
||||||
"--cov=python_pkg",
|
|
||||||
"--cov-branch",
|
|
||||||
"--cov-report=term-missing",
|
|
||||||
"--cov-report=lcov",
|
|
||||||
]
|
|
||||||
filterwarnings = [
|
|
||||||
"error",
|
|
||||||
"ignore::DeprecationWarning",
|
|
||||||
"default::pytest.PytestUnraisableExceptionWarning",
|
|
||||||
]
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# COVERAGE - Code coverage configuration
|
|
||||||
# ============================================================================
|
|
||||||
[tool.coverage.run]
|
|
||||||
source = ["python_pkg"]
|
|
||||||
branch = true
|
|
||||||
omit = [
|
|
||||||
"*/__pycache__/*",
|
|
||||||
"*/tests/*",
|
|
||||||
"*/.venv/*",
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.coverage.report]
|
|
||||||
# Fail under this percentage
|
|
||||||
fail_under = 100
|
|
||||||
show_missing = true
|
|
||||||
skip_covered = false
|
|
||||||
exclude_lines = [
|
|
||||||
# Standard exclusions
|
|
||||||
"pragma: no cover",
|
|
||||||
# Unreachable defensive code
|
|
||||||
"raise NotImplementedError",
|
|
||||||
"raise AssertionError",
|
|
||||||
# Type checking imports
|
|
||||||
"if TYPE_CHECKING:",
|
|
||||||
# Main script entry point
|
|
||||||
'if __name__ == "__main__":',
|
|
||||||
]
|
|
||||||
# Partial branch exclusions for unreachable branches
|
|
||||||
partial_branches = [
|
|
||||||
"pragma: no branch",
|
|
||||||
]
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# VULTURE - Dead code detection
|
|
||||||
# ============================================================================
|
|
||||||
# Note: Vulture uses command-line args, but we can document settings here
|
|
||||||
# vulture --min-confidence 80 --exclude ".venv,Bash" .
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# FLAKE8 - Python linter (via Flake8-pyproject for pyproject.toml support)
|
|
||||||
# ============================================================================
|
|
||||||
[tool.flake8]
|
|
||||||
# Maximum line length (matches ruff/black)
|
|
||||||
max-line-length = 88
|
|
||||||
# Maximum McCabe complexity (matches ruff C901 threshold)
|
|
||||||
max-complexity = 10
|
|
||||||
# Maximum cognitive complexity (flake8-cognitive-complexity)
|
|
||||||
max-cognitive-complexity = 12
|
|
||||||
# Maximum function length (flake8-functions)
|
|
||||||
max-function-length = 20
|
|
||||||
# Maximum returns/arguments per function
|
|
||||||
max-returns-amount = 6
|
|
||||||
max-arguments = 5
|
|
||||||
# Docstring convention (matches ruff)
|
|
||||||
docstring-convention = "google"
|
|
||||||
# Select all error codes
|
|
||||||
select = ["E", "F", "W", "C", "B", "B950"]
|
|
||||||
# Extend with plugin codes
|
|
||||||
extend-select = ["B", "B9", "C4", "SIM", "PT", "TC", "ANN"]
|
|
||||||
# Ignore rules that conflict with ruff-format or are duplicated
|
|
||||||
extend-ignore = [
|
|
||||||
"E501", # Line too long - B950 from bugbear is smarter (allows 10% overflow)
|
|
||||||
"W503", # Line break before binary operator - contradicts PEP 8 update
|
|
||||||
"ANN101", # Missing type annotation for self
|
|
||||||
"ANN102", # Missing type annotation for cls
|
|
||||||
]
|
|
||||||
# Exclude directories
|
|
||||||
exclude = [
|
|
||||||
".git",
|
|
||||||
".venv",
|
|
||||||
"__pycache__",
|
|
||||||
"build",
|
|
||||||
"dist",
|
|
||||||
".eggs",
|
|
||||||
"Bash/ffmpeg-build",
|
|
||||||
]
|
|
||||||
# Per-file ignores
|
|
||||||
per-file-ignores = [
|
|
||||||
"**/tests/**/*.py:S101,ANN",
|
|
||||||
"**/test_*.py:S101,ANN",
|
|
||||||
]
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PYDOCSTYLE - Docstring style checker (ruff handles this, but for standalone)
|
|
||||||
# ============================================================================
|
|
||||||
# Configured in ruff.lint.pydocstyle above
|
|
||||||
1
pyproject.toml
Symbolic link
1
pyproject.toml
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
meta/pyproject.toml
|
||||||
@ -1,206 +0,0 @@
|
|||||||
"""Shared geographic data module for Warsaw and Poland Anki generators.
|
|
||||||
|
|
||||||
This module handles downloading and caching geographic data from various sources:
|
|
||||||
- OpenStreetMap via Overpass API
|
|
||||||
- Geofabrik OSM extracts
|
|
||||||
- GitHub repositories with pre-processed GeoJSON
|
|
||||||
|
|
||||||
All data is cached locally to avoid repeated downloads.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import shutil
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import (
|
|
||||||
CACHE_DIR,
|
|
||||||
MAX_RETRIES,
|
|
||||||
MIN_LAKE_AREA_KM2,
|
|
||||||
MIN_LINE_COORDS,
|
|
||||||
MIN_PEAK_ELEVATION,
|
|
||||||
MIN_RING_COORDS,
|
|
||||||
MIN_RIVER_LENGTH_KM,
|
|
||||||
OVERPASS_ENDPOINTS,
|
|
||||||
POLSKA_GEOJSON_BASE,
|
|
||||||
REQUEST_TIMEOUT,
|
|
||||||
RETRY_DELAY,
|
|
||||||
WIKIDATA_SPARQL,
|
|
||||||
)
|
|
||||||
from python_pkg.geo_data._poland_admin import (
|
|
||||||
get_poland_boundary,
|
|
||||||
get_polish_gminy,
|
|
||||||
get_polish_powiaty,
|
|
||||||
get_polish_wojewodztwa,
|
|
||||||
)
|
|
||||||
from python_pkg.geo_data._poland_nature import (
|
|
||||||
get_polish_forests,
|
|
||||||
get_polish_landscape_parks,
|
|
||||||
get_polish_mountain_peaks,
|
|
||||||
get_polish_mountain_ranges,
|
|
||||||
get_polish_national_parks,
|
|
||||||
get_polish_nature_reserves,
|
|
||||||
)
|
|
||||||
from python_pkg.geo_data._poland_water import (
|
|
||||||
get_polish_coastal_features,
|
|
||||||
get_polish_islands,
|
|
||||||
get_polish_lakes,
|
|
||||||
get_polish_rivers,
|
|
||||||
get_polish_unesco_sites,
|
|
||||||
)
|
|
||||||
from python_pkg.geo_data._warsaw import (
|
|
||||||
get_vistula_river,
|
|
||||||
get_warsaw_boundary,
|
|
||||||
get_warsaw_bridges,
|
|
||||||
get_warsaw_districts,
|
|
||||||
get_warsaw_metro_stations,
|
|
||||||
get_warsaw_osiedla,
|
|
||||||
)
|
|
||||||
from python_pkg.geo_data._warsaw_places import get_warsaw_landmarks, get_warsaw_streets
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"CACHE_DIR",
|
|
||||||
"MAX_RETRIES",
|
|
||||||
"MIN_LAKE_AREA_KM2",
|
|
||||||
"MIN_LINE_COORDS",
|
|
||||||
"MIN_PEAK_ELEVATION",
|
|
||||||
"MIN_RING_COORDS",
|
|
||||||
"MIN_RIVER_LENGTH_KM",
|
|
||||||
"OVERPASS_ENDPOINTS",
|
|
||||||
"POLSKA_GEOJSON_BASE",
|
|
||||||
"REQUEST_TIMEOUT",
|
|
||||||
"RETRY_DELAY",
|
|
||||||
"WIKIDATA_SPARQL",
|
|
||||||
"clear_cache",
|
|
||||||
"download_all_poland_data",
|
|
||||||
"download_all_warsaw_data",
|
|
||||||
"get_poland_boundary",
|
|
||||||
"get_polish_coastal_features",
|
|
||||||
"get_polish_forests",
|
|
||||||
"get_polish_gminy",
|
|
||||||
"get_polish_islands",
|
|
||||||
"get_polish_lakes",
|
|
||||||
"get_polish_landscape_parks",
|
|
||||||
"get_polish_mountain_peaks",
|
|
||||||
"get_polish_mountain_ranges",
|
|
||||||
"get_polish_national_parks",
|
|
||||||
"get_polish_nature_reserves",
|
|
||||||
"get_polish_powiaty",
|
|
||||||
"get_polish_rivers",
|
|
||||||
"get_polish_unesco_sites",
|
|
||||||
"get_polish_wojewodztwa",
|
|
||||||
"get_vistula_river",
|
|
||||||
"get_warsaw_boundary",
|
|
||||||
"get_warsaw_bridges",
|
|
||||||
"get_warsaw_districts",
|
|
||||||
"get_warsaw_landmarks",
|
|
||||||
"get_warsaw_metro_stations",
|
|
||||||
"get_warsaw_osiedla",
|
|
||||||
"get_warsaw_streets",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def download_all_warsaw_data() -> None:
|
|
||||||
"""Download and cache all Warsaw geographic data.
|
|
||||||
|
|
||||||
Call this once to pre-populate the cache.
|
|
||||||
"""
|
|
||||||
sys.stdout.write("Downloading all Warsaw geographic data...\n")
|
|
||||||
sys.stdout.write("=" * 60 + "\n")
|
|
||||||
|
|
||||||
sys.stdout.write("\n1. Warsaw boundary...\n")
|
|
||||||
get_warsaw_boundary()
|
|
||||||
|
|
||||||
sys.stdout.write("\n2. Vistula river...\n")
|
|
||||||
get_vistula_river()
|
|
||||||
|
|
||||||
sys.stdout.write("\n3. Warsaw bridges...\n")
|
|
||||||
get_warsaw_bridges()
|
|
||||||
|
|
||||||
sys.stdout.write("\n4. Metro stations...\n")
|
|
||||||
get_warsaw_metro_stations()
|
|
||||||
|
|
||||||
sys.stdout.write("\n5. Major streets...\n")
|
|
||||||
get_warsaw_streets()
|
|
||||||
|
|
||||||
sys.stdout.write("\n6. Landmarks...\n")
|
|
||||||
get_warsaw_landmarks()
|
|
||||||
|
|
||||||
sys.stdout.write("\n7. Osiedla...\n")
|
|
||||||
get_warsaw_osiedla()
|
|
||||||
|
|
||||||
sys.stdout.write("\n" + "=" * 60 + "\n")
|
|
||||||
sys.stdout.write("All Warsaw data cached successfully!\n")
|
|
||||||
|
|
||||||
|
|
||||||
def download_all_poland_data() -> None:
|
|
||||||
"""Download and cache all Poland geographic data.
|
|
||||||
|
|
||||||
Call this once to pre-populate the cache.
|
|
||||||
"""
|
|
||||||
sys.stdout.write("Downloading all Poland geographic data...\n")
|
|
||||||
sys.stdout.write("=" * 60 + "\n")
|
|
||||||
|
|
||||||
sys.stdout.write("\n1. Województwa...\n")
|
|
||||||
get_polish_wojewodztwa()
|
|
||||||
|
|
||||||
sys.stdout.write("\n2. Powiaty...\n")
|
|
||||||
get_polish_powiaty()
|
|
||||||
|
|
||||||
sys.stdout.write("\n3. Gminy (this may take a while)...\n")
|
|
||||||
get_polish_gminy()
|
|
||||||
|
|
||||||
sys.stdout.write("\n4. Poland boundary...\n")
|
|
||||||
get_poland_boundary()
|
|
||||||
|
|
||||||
sys.stdout.write("\n" + "=" * 60 + "\n")
|
|
||||||
sys.stdout.write("All Poland data cached successfully!\n")
|
|
||||||
|
|
||||||
|
|
||||||
def clear_cache() -> None:
|
|
||||||
"""Clear all cached data."""
|
|
||||||
if CACHE_DIR.exists():
|
|
||||||
shutil.rmtree(CACHE_DIR)
|
|
||||||
sys.stdout.write("Cache cleared.\n")
|
|
||||||
else:
|
|
||||||
sys.stdout.write("Cache directory does not exist.\n")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Manage geographic data cache")
|
|
||||||
parser.add_argument(
|
|
||||||
"--download-warsaw",
|
|
||||||
action="store_true",
|
|
||||||
help="Download all Warsaw data",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--download-poland",
|
|
||||||
action="store_true",
|
|
||||||
help="Download all Poland data",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--download-all",
|
|
||||||
action="store_true",
|
|
||||||
help="Download all data",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--clear-cache",
|
|
||||||
action="store_true",
|
|
||||||
help="Clear cached data",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.clear_cache:
|
|
||||||
clear_cache()
|
|
||||||
elif args.download_warsaw:
|
|
||||||
download_all_warsaw_data()
|
|
||||||
elif args.download_poland:
|
|
||||||
download_all_poland_data()
|
|
||||||
elif args.download_all:
|
|
||||||
download_all_warsaw_data()
|
|
||||||
download_all_poland_data()
|
|
||||||
else:
|
|
||||||
parser.print_help()
|
|
||||||
@ -1,317 +0,0 @@
|
|||||||
"""Common utilities for geographic data operations.
|
|
||||||
|
|
||||||
Shared constants, API helpers, and geometry extraction functions used
|
|
||||||
across the geo_data package.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
import requests
|
|
||||||
from shapely.geometry import (
|
|
||||||
GeometryCollection,
|
|
||||||
MultiPolygon,
|
|
||||||
Polygon,
|
|
||||||
)
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
# Parent directory of the geo_data package (i.e. python_pkg/)
|
|
||||||
_PKG_DIR = Path(__file__).resolve().parent.parent
|
|
||||||
|
|
||||||
# Shared cache directory for all geo data
|
|
||||||
CACHE_DIR = _PKG_DIR / "geo_cache"
|
|
||||||
|
|
||||||
# Overpass API endpoints (multiple for redundancy)
|
|
||||||
# Note: kumi.systems is more reliable, so it's first
|
|
||||||
OVERPASS_ENDPOINTS = [
|
|
||||||
"https://overpass.kumi.systems/api/interpreter",
|
|
||||||
"https://overpass-api.de/api/interpreter",
|
|
||||||
"https://maps.mail.ru/osm/tools/overpass/api/interpreter",
|
|
||||||
]
|
|
||||||
|
|
||||||
# GitHub URLs for pre-processed data
|
|
||||||
POLSKA_GEOJSON_BASE = "https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master"
|
|
||||||
|
|
||||||
# Wikidata SPARQL endpoint
|
|
||||||
WIKIDATA_SPARQL = "https://query.wikidata.org/sparql"
|
|
||||||
|
|
||||||
# Request timeout and retry settings
|
|
||||||
REQUEST_TIMEOUT = 180
|
|
||||||
MAX_RETRIES = 3
|
|
||||||
RETRY_DELAY = 5
|
|
||||||
|
|
||||||
# Data thresholds for filtering
|
|
||||||
MIN_PEAK_ELEVATION = 300 # meters
|
|
||||||
MIN_LAKE_AREA_KM2 = 0.5 # km²
|
|
||||||
MIN_RIVER_LENGTH_KM = 10 # km
|
|
||||||
MIN_LINE_COORDS = 2 # minimum coordinates for a line
|
|
||||||
MIN_RING_COORDS = 4 # minimum coordinates for a polygon ring
|
|
||||||
|
|
||||||
|
|
||||||
def _ensure_cache_dir() -> None:
|
|
||||||
"""Create cache directory if it doesn't exist."""
|
|
||||||
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_polygonal_geometry(
|
|
||||||
geom: Polygon | MultiPolygon | GeometryCollection,
|
|
||||||
) -> Polygon | MultiPolygon | None:
|
|
||||||
"""Extract only polygonal geometry from a geometry that may be mixed.
|
|
||||||
|
|
||||||
Some OSM data comes as GeometryCollections containing polygons mixed with
|
|
||||||
lines. This function extracts only the polygon/multipolygon parts.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
geom: Input geometry (Polygon, MultiPolygon, or GeometryCollection).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Polygon or MultiPolygon with only the polygonal parts, or None if empty.
|
|
||||||
"""
|
|
||||||
if isinstance(geom, Polygon | MultiPolygon):
|
|
||||||
return geom
|
|
||||||
|
|
||||||
if isinstance(geom, GeometryCollection):
|
|
||||||
polygons = [g for g in geom.geoms if isinstance(g, Polygon | MultiPolygon)]
|
|
||||||
if not polygons:
|
|
||||||
return None
|
|
||||||
if len(polygons) == 1:
|
|
||||||
return polygons[0]
|
|
||||||
# Flatten MultiPolygons and combine all polygons
|
|
||||||
all_polys = []
|
|
||||||
for p in polygons:
|
|
||||||
if isinstance(p, Polygon):
|
|
||||||
all_polys.append(p)
|
|
||||||
elif isinstance(p, MultiPolygon): # pragma: no branch
|
|
||||||
all_polys.extend(p.geoms)
|
|
||||||
return MultiPolygon(all_polys)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _try_single_request(
|
|
||||||
endpoint: str, query: str
|
|
||||||
) -> tuple[dict[str, Any] | None, Exception | None]:
|
|
||||||
"""Try a single request to an endpoint.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
endpoint: Overpass API endpoint URL.
|
|
||||||
query: Overpass QL query string.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (result, error). One will be None.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
sys.stdout.write(f" Querying {endpoint}...\n")
|
|
||||||
response = requests.post(
|
|
||||||
endpoint,
|
|
||||||
data={"data": query},
|
|
||||||
timeout=REQUEST_TIMEOUT,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
result = response.json()
|
|
||||||
except (requests.RequestException, requests.Timeout, ValueError) as e:
|
|
||||||
return None, e
|
|
||||||
else:
|
|
||||||
# Check for valid response with elements
|
|
||||||
if not isinstance(result, dict) or "elements" not in result:
|
|
||||||
return None, ValueError("Invalid response format")
|
|
||||||
return result, None
|
|
||||||
|
|
||||||
|
|
||||||
def _overpass_query(query: str) -> dict[str, Any]:
|
|
||||||
"""Execute an Overpass API query with retry logic.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: Overpass QL query string.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
JSON response from the API.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
RuntimeError: If all endpoints fail.
|
|
||||||
"""
|
|
||||||
last_error: Exception | None = None
|
|
||||||
|
|
||||||
for endpoint in OVERPASS_ENDPOINTS:
|
|
||||||
for attempt in range(MAX_RETRIES):
|
|
||||||
result, error = _try_single_request(endpoint, query)
|
|
||||||
if result is not None:
|
|
||||||
return result
|
|
||||||
last_error = error
|
|
||||||
sys.stdout.write(f" Attempt {attempt + 1} failed: {error}\n")
|
|
||||||
if attempt < MAX_RETRIES - 1:
|
|
||||||
time.sleep(RETRY_DELAY)
|
|
||||||
|
|
||||||
msg = f"All Overpass API endpoints failed. Last error: {last_error}"
|
|
||||||
raise RuntimeError(msg)
|
|
||||||
|
|
||||||
|
|
||||||
def _download_github_geojson(url: str, cache_path: Path) -> gpd.GeoDataFrame:
|
|
||||||
"""Download GeoJSON from GitHub and cache it.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: URL to download from.
|
|
||||||
cache_path: Path to cache the data.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with the data.
|
|
||||||
"""
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
sys.stdout.write(f"Downloading from {url}...\n")
|
|
||||||
if not url.startswith(("http://", "https://")):
|
|
||||||
msg = f"Unsupported URL scheme: {url}"
|
|
||||||
raise ValueError(msg)
|
|
||||||
response = requests.get(url, timeout=REQUEST_TIMEOUT)
|
|
||||||
data = response.json()
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
cache_path.write_text(json.dumps(data))
|
|
||||||
|
|
||||||
return gpd.GeoDataFrame.from_features(data["features"], crs="EPSG:4326")
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_osiedla_rings(
|
|
||||||
element: dict[str, Any], min_coords: int
|
|
||||||
) -> tuple[list[list[tuple[float, float]]], list[list[tuple[float, float]]]]:
|
|
||||||
"""Extract outer and inner rings from an OSM relation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element: OSM relation element.
|
|
||||||
min_coords: Minimum number of coordinates for a valid ring.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (outer_rings, inner_rings).
|
|
||||||
"""
|
|
||||||
outer_rings: list[list[tuple[float, float]]] = []
|
|
||||||
inner_rings: list[list[tuple[float, float]]] = []
|
|
||||||
|
|
||||||
for member in element.get("members", []):
|
|
||||||
if "geometry" not in member:
|
|
||||||
continue
|
|
||||||
ring = [(p["lon"], p["lat"]) for p in member["geometry"]]
|
|
||||||
if len(ring) < min_coords:
|
|
||||||
continue
|
|
||||||
# Close the ring if not closed
|
|
||||||
if ring[0] != ring[-1]:
|
|
||||||
ring.append(ring[0])
|
|
||||||
if member.get("role") == "outer":
|
|
||||||
outer_rings.append(ring)
|
|
||||||
elif member.get("role") == "inner":
|
|
||||||
inner_rings.append(ring)
|
|
||||||
|
|
||||||
return outer_rings, inner_rings
|
|
||||||
|
|
||||||
|
|
||||||
def _build_osiedla_geometry(
|
|
||||||
outer_rings: list[list[tuple[float, float]]],
|
|
||||||
inner_rings: list[list[tuple[float, float]]],
|
|
||||||
) -> dict[str, Any]:
|
|
||||||
"""Build GeoJSON geometry from outer and inner rings.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
outer_rings: List of outer ring coordinates.
|
|
||||||
inner_rings: List of inner ring coordinates.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoJSON geometry dict.
|
|
||||||
"""
|
|
||||||
if len(outer_rings) == 1:
|
|
||||||
return {
|
|
||||||
"type": "Polygon",
|
|
||||||
"coordinates": [outer_rings[0], *inner_rings],
|
|
||||||
}
|
|
||||||
# Multiple outer rings - create MultiPolygon
|
|
||||||
# Each polygon in a MultiPolygon is [exterior, hole1, hole2, ...]
|
|
||||||
return {
|
|
||||||
"type": "MultiPolygon",
|
|
||||||
"coordinates": [[ring] for ring in outer_rings],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_polygon_from_element(
|
|
||||||
element: dict[str, Any],
|
|
||||||
) -> dict[str, Any] | None:
|
|
||||||
"""Extract polygon geometry from an OSM relation or way element.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element: OSM element (relation or way).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoJSON geometry dict, or None if extraction fails.
|
|
||||||
"""
|
|
||||||
if element.get("type") == "relation":
|
|
||||||
outer_rings, inner_rings = _extract_osiedla_rings(element, MIN_RING_COORDS)
|
|
||||||
if not outer_rings:
|
|
||||||
return None
|
|
||||||
return _build_osiedla_geometry(outer_rings, inner_rings)
|
|
||||||
|
|
||||||
if element.get("type") == "way" and "geometry" in element:
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) < MIN_RING_COORDS:
|
|
||||||
return None
|
|
||||||
if coords[0] != coords[-1]:
|
|
||||||
coords.append(coords[0])
|
|
||||||
return {"type": "Polygon", "coordinates": [coords]}
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_line_from_way(element: dict[str, Any]) -> dict[str, Any] | None:
|
|
||||||
"""Extract line geometry from an OSM way element.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element: OSM way element.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoJSON LineString geometry dict, or None if extraction fails.
|
|
||||||
"""
|
|
||||||
if element.get("type") != "way" or "geometry" not in element:
|
|
||||||
return None
|
|
||||||
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) < MIN_LINE_COORDS:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {"type": "LineString", "coordinates": coords}
|
|
||||||
|
|
||||||
|
|
||||||
def _add_area_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
||||||
"""Add area_km2 column to a GeoDataFrame.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
gdf: GeoDataFrame with polygon geometries.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with area_km2 column added.
|
|
||||||
"""
|
|
||||||
if len(gdf) == 0:
|
|
||||||
return gdf
|
|
||||||
gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system
|
|
||||||
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
|
|
||||||
def _add_length_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
||||||
"""Add length_km column to a GeoDataFrame.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
gdf: GeoDataFrame with line geometries.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with length_km column added.
|
|
||||||
"""
|
|
||||||
if len(gdf) == 0:
|
|
||||||
return gdf
|
|
||||||
gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system
|
|
||||||
gdf["length_km"] = gdf_proj.geometry.length / 1000
|
|
||||||
return gdf
|
|
||||||
@ -1,225 +0,0 @@
|
|||||||
"""Polish administrative boundary data.
|
|
||||||
|
|
||||||
Functions for downloading and caching Polish administrative divisions:
|
|
||||||
województwa, powiaty, gminy, and the national boundary.
|
|
||||||
Includes Wikidata integration for population data.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import contextlib
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import (
|
|
||||||
CACHE_DIR,
|
|
||||||
POLSKA_GEOJSON_BASE,
|
|
||||||
WIKIDATA_SPARQL,
|
|
||||||
_add_area_column,
|
|
||||||
_build_osiedla_geometry,
|
|
||||||
_download_github_geojson,
|
|
||||||
_ensure_cache_dir,
|
|
||||||
_extract_osiedla_rings,
|
|
||||||
_overpass_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def _query_wikidata(query: str) -> list[dict[str, Any]]:
|
|
||||||
"""Query Wikidata SPARQL endpoint.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: SPARQL query string.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of result bindings.
|
|
||||||
"""
|
|
||||||
response = requests.get(
|
|
||||||
WIKIDATA_SPARQL,
|
|
||||||
params={"query": query, "format": "json"},
|
|
||||||
timeout=60,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response.json()["results"]["bindings"]
|
|
||||||
|
|
||||||
|
|
||||||
def _get_powiaty_population() -> dict[str, int]:
|
|
||||||
"""Get population data for all Polish powiaty from Wikidata.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary mapping powiat name to population.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "powiaty_population.json"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return json.loads(cache_path.read_text())
|
|
||||||
|
|
||||||
# Query Wikidata for all powiaty (Q247073) in Poland (Q36) with population
|
|
||||||
# Filter to only current Polish powiaty using country=Poland filter
|
|
||||||
query = """
|
|
||||||
SELECT ?powiat ?powiatLabel ?population WHERE {
|
|
||||||
?powiat wdt:P31 wd:Q247073.
|
|
||||||
?powiat wdt:P17 wd:Q36.
|
|
||||||
?powiat wdt:P1082 ?population.
|
|
||||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "pl,en". }
|
|
||||||
}
|
|
||||||
ORDER BY DESC(?population)
|
|
||||||
"""
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching powiaty population data from Wikidata...\n")
|
|
||||||
results = _query_wikidata(query)
|
|
||||||
|
|
||||||
population_map: dict[str, int] = {}
|
|
||||||
for item in results:
|
|
||||||
label = item.get("powiatLabel", {}).get("value", "")
|
|
||||||
pop = item.get("population", {}).get("value", "0")
|
|
||||||
if label and pop:
|
|
||||||
# Remove "powiat" prefix if present for matching
|
|
||||||
clean_label = label.replace("powiat ", "").strip()
|
|
||||||
with contextlib.suppress(ValueError):
|
|
||||||
population_map[clean_label] = int(pop)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
cache_path.write_text(json.dumps(population_map, ensure_ascii=False, indent=2))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached population data for {len(population_map)} powiaty.\n")
|
|
||||||
return population_map
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_wojewodztwa() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish województwa (voivodeships).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with województwa boundaries.
|
|
||||||
"""
|
|
||||||
url = f"{POLSKA_GEOJSON_BASE}/wojewodztwa/wojewodztwa-min.geojson"
|
|
||||||
cache_path = CACHE_DIR / "polish_wojewodztwa.geojson"
|
|
||||||
return _download_github_geojson(url, cache_path)
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_powiaty() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish powiaty (counties), sorted by population descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with powiat boundaries and population.
|
|
||||||
"""
|
|
||||||
url = f"{POLSKA_GEOJSON_BASE}/powiaty/powiaty-min.geojson"
|
|
||||||
cache_path = CACHE_DIR / "polish_powiaty.geojson"
|
|
||||||
gdf = _download_github_geojson(url, cache_path)
|
|
||||||
|
|
||||||
# Get population data from Wikidata
|
|
||||||
population_map = _get_powiaty_population()
|
|
||||||
|
|
||||||
# Add population column
|
|
||||||
def get_population(nazwa: str) -> int:
|
|
||||||
"""Match powiat name to population data."""
|
|
||||||
if not nazwa:
|
|
||||||
return 0
|
|
||||||
# Remove "powiat " prefix for matching
|
|
||||||
clean_name = nazwa.replace("powiat ", "").strip()
|
|
||||||
# Try direct match
|
|
||||||
if clean_name in population_map:
|
|
||||||
return population_map[clean_name]
|
|
||||||
# Try lowercase
|
|
||||||
name_lower = clean_name.lower()
|
|
||||||
for pop_name, pop in population_map.items():
|
|
||||||
if pop_name.lower() == name_lower:
|
|
||||||
return pop
|
|
||||||
return 0
|
|
||||||
|
|
||||||
gdf["population"] = gdf["nazwa"].apply(get_population)
|
|
||||||
|
|
||||||
# Sort by population descending
|
|
||||||
return gdf.sort_values("population", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_gminy() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish gminy (municipalities) from OSM, sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with gminy boundaries.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_gminy.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching gminy data from OSM (this may take a while)...\n")
|
|
||||||
# Polish gminy are admin_level=7 in OSM
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:300];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
relation["boundary"="administrative"]["admin_level"="7"]["name"](area.pl);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
min_ring_coords = 4
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") != "relation":
|
|
||||||
continue
|
|
||||||
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
|
|
||||||
if not outer_rings:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name},
|
|
||||||
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} gminy.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
# Add area column
|
|
||||||
gdf = _add_area_column(gdf)
|
|
||||||
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_poland_boundary() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Poland country boundary.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with Poland boundary.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "poland_boundary.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
# Dissolve from województwa
|
|
||||||
woj = get_polish_wojewodztwa()
|
|
||||||
# Fix invalid geometries with buffer(0)
|
|
||||||
woj["geometry"] = woj["geometry"].buffer(0)
|
|
||||||
poland = gpd.GeoDataFrame(geometry=[woj.union_all()], crs=woj.crs)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
poland.to_file(cache_path, driver="GeoJSON")
|
|
||||||
|
|
||||||
return poland
|
|
||||||
@ -1,446 +0,0 @@
|
|||||||
"""Polish natural land features.
|
|
||||||
|
|
||||||
Functions for downloading and caching data about Polish mountains,
|
|
||||||
national parks, forests, nature reserves, and landscape parks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import contextlib
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import (
|
|
||||||
CACHE_DIR,
|
|
||||||
MIN_PEAK_ELEVATION,
|
|
||||||
_add_area_column,
|
|
||||||
_build_osiedla_geometry,
|
|
||||||
_ensure_cache_dir,
|
|
||||||
_extract_osiedla_rings,
|
|
||||||
_extract_polygon_from_element,
|
|
||||||
_extract_polygonal_geometry,
|
|
||||||
_overpass_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_mountain_peaks() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish mountain peaks, sorted by elevation descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with mountain peak points and elevation.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_mountain_peaks.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
return gdf.sort_values("elevation", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching mountain peaks data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:120];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
node["natural"="peak"]["name"]["ele"](area.pl);
|
|
||||||
);
|
|
||||||
out;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") != "node":
|
|
||||||
continue
|
|
||||||
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
ele_str = element.get("tags", {}).get("ele", "")
|
|
||||||
|
|
||||||
if not name or not ele_str or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
with contextlib.suppress(ValueError):
|
|
||||||
elevation = float(ele_str.replace(",", ".").split()[0])
|
|
||||||
if elevation < MIN_PEAK_ELEVATION:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name, "elevation": elevation},
|
|
||||||
"geometry": {
|
|
||||||
"type": "Point",
|
|
||||||
"coordinates": [element["lon"], element["lat"]],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} mountain peaks.\n")
|
|
||||||
|
|
||||||
if not features:
|
|
||||||
msg = "No mountain peaks found in OSM data"
|
|
||||||
raise ValueError(msg)
|
|
||||||
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
return gdf.sort_values("elevation", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_mountain_ranges() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish mountain ranges, sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with mountain range polygons.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_mountain_ranges.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
# Fix invalid geometries from OSM data and extract only polygons
|
|
||||||
gdf["geometry"] = gdf.geometry.make_valid()
|
|
||||||
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
|
|
||||||
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching mountain ranges data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:180];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["natural"="mountain_range"]["name"](area.pl);
|
|
||||||
way["natural"="mountain_range"]["name"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features: list[dict[str, Any]] = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
min_ring_coords = 4
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if element.get("type") == "relation":
|
|
||||||
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
|
|
||||||
if not outer_rings:
|
|
||||||
continue
|
|
||||||
geometry = _build_osiedla_geometry(outer_rings, inner_rings)
|
|
||||||
elif element.get("type") == "way" and "geometry" in element:
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) < min_ring_coords:
|
|
||||||
continue
|
|
||||||
if coords[0] != coords[-1]:
|
|
||||||
coords.append(coords[0])
|
|
||||||
geometry = {"type": "Polygon", "coordinates": [coords]}
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} mountain ranges.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
# Fix invalid geometries from OSM data and extract only polygons
|
|
||||||
gdf["geometry"] = gdf.geometry.make_valid()
|
|
||||||
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
|
|
||||||
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
|
|
||||||
|
|
||||||
# Calculate area in km²
|
|
||||||
gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system
|
|
||||||
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
|
|
||||||
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_national_parks() -> gpd.GeoDataFrame:
|
|
||||||
"""Get all 23 Polish national parks, sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with national park polygons.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_national_parks.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching national parks data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:180];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["boundary"="national_park"]["name"](area.pl);
|
|
||||||
relation["leisure"="nature_reserve"]["name"]["protect_class"="2"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
min_ring_coords = 4
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") != "relation":
|
|
||||||
continue
|
|
||||||
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Filter to only include "Park Narodowy" in name
|
|
||||||
if "Narodowy" not in name and "narodowy" not in name.lower():
|
|
||||||
continue
|
|
||||||
|
|
||||||
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
|
|
||||||
if not outer_rings:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name},
|
|
||||||
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} national parks.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
# Calculate area in km²
|
|
||||||
gdf_proj = gdf.to_crs("EPSG:2180")
|
|
||||||
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
|
|
||||||
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_forests() -> gpd.GeoDataFrame:
|
|
||||||
"""Get major Polish forests (puszcze), sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with forest polygons.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_forests.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching forests data from OSM...\n")
|
|
||||||
# Query for named forests, especially "Puszcza" type
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:300];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["natural"="wood"]["name"](area.pl);
|
|
||||||
relation["landuse"="forest"]["name"~"Puszcza|Bory|Las"](area.pl);
|
|
||||||
way["natural"="wood"]["name"~"Puszcza|Bory"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
forest_keywords = ("Puszcza", "Bory", "Las ", "Lasy ")
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
if not any(keyword in name for keyword in forest_keywords):
|
|
||||||
continue
|
|
||||||
|
|
||||||
geometry = _extract_polygon_from_element(element)
|
|
||||||
if geometry is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} forests.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
gdf = _add_area_column(gdf)
|
|
||||||
|
|
||||||
if len(gdf) > 0:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_nature_reserves() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish nature reserves, sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with nature reserve polygons.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_nature_reserves.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write(
|
|
||||||
"Fetching nature reserves data from OSM (this may take a while)...\n"
|
|
||||||
)
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:600];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["leisure"="nature_reserve"]["name"](area.pl);
|
|
||||||
way["leisure"="nature_reserve"]["name"](area.pl);
|
|
||||||
relation["boundary"="protected_area"]["protect_class"="4"]["name"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
geometry = _extract_polygon_from_element(element)
|
|
||||||
if geometry is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} nature reserves.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
gdf = _add_area_column(gdf)
|
|
||||||
|
|
||||||
if len(gdf) > 0:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_landscape_parks() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish landscape parks, sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with landscape park polygons.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_landscape_parks.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
# Fix invalid geometries from OSM data and extract only polygons
|
|
||||||
gdf["geometry"] = gdf.geometry.make_valid()
|
|
||||||
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
|
|
||||||
# Remove any rows where geometry extraction failed
|
|
||||||
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching landscape parks data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:300];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["boundary"="protected_area"]["protect_class"="5"]["name"](area.pl);
|
|
||||||
relation["leisure"="nature_reserve"]["name"~"Park Krajobrazowy"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
min_ring_coords = 4
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") != "relation":
|
|
||||||
continue
|
|
||||||
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
|
|
||||||
if not outer_rings:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name},
|
|
||||||
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} landscape parks.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
# Fix invalid geometries from OSM data and extract only polygons
|
|
||||||
gdf["geometry"] = gdf.geometry.make_valid()
|
|
||||||
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
|
|
||||||
# Remove any rows where geometry extraction failed
|
|
||||||
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
|
|
||||||
|
|
||||||
if len(gdf) > 0:
|
|
||||||
gdf_proj = gdf.to_crs("EPSG:2180")
|
|
||||||
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
return gdf
|
|
||||||
@ -1,437 +0,0 @@
|
|||||||
"""Polish water features and cultural sites.
|
|
||||||
|
|
||||||
Functions for downloading and caching data about Polish lakes, rivers,
|
|
||||||
islands, coastal features, and UNESCO World Heritage sites.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import (
|
|
||||||
CACHE_DIR,
|
|
||||||
MIN_LAKE_AREA_KM2,
|
|
||||||
MIN_LINE_COORDS,
|
|
||||||
MIN_RING_COORDS,
|
|
||||||
MIN_RIVER_LENGTH_KM,
|
|
||||||
_add_area_column,
|
|
||||||
_add_length_column,
|
|
||||||
_build_osiedla_geometry,
|
|
||||||
_ensure_cache_dir,
|
|
||||||
_extract_osiedla_rings,
|
|
||||||
_extract_polygon_from_element,
|
|
||||||
_overpass_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_coastal_geometry(
|
|
||||||
element: dict[str, Any],
|
|
||||||
natural_type: str,
|
|
||||||
line_types: tuple[str, ...],
|
|
||||||
) -> dict[str, Any] | None:
|
|
||||||
"""Extract geometry from a coastal feature element.
|
|
||||||
|
|
||||||
For cliffs and beaches, returns LineString. For others, returns Polygon.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element: OSM element.
|
|
||||||
natural_type: The natural= tag value.
|
|
||||||
line_types: Tuple of natural types that should be lines.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoJSON geometry dict, or None if extraction fails.
|
|
||||||
"""
|
|
||||||
if element.get("type") == "relation":
|
|
||||||
return _extract_polygon_from_element(element)
|
|
||||||
|
|
||||||
if element.get("type") != "way" or "geometry" not in element:
|
|
||||||
return None
|
|
||||||
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) < MIN_LINE_COORDS:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# For cliffs and beaches, keep as linestring
|
|
||||||
if natural_type in line_types:
|
|
||||||
return {"type": "LineString", "coordinates": coords}
|
|
||||||
|
|
||||||
# Otherwise try to make a polygon
|
|
||||||
if len(coords) >= MIN_RING_COORDS:
|
|
||||||
if coords[0] != coords[-1]:
|
|
||||||
coords.append(coords[0])
|
|
||||||
return {"type": "Polygon", "coordinates": [coords]}
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_river_coords_from_element(
|
|
||||||
element: dict[str, Any],
|
|
||||||
) -> list[list[tuple[float, float]]]:
|
|
||||||
"""Extract coordinate lists from a river element.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element: OSM element (way or relation).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of coordinate lists (line segments).
|
|
||||||
"""
|
|
||||||
coord_lists: list[list[tuple[float, float]]] = []
|
|
||||||
|
|
||||||
if element.get("type") == "way" and "geometry" in element:
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) >= MIN_LINE_COORDS:
|
|
||||||
coord_lists.append(coords)
|
|
||||||
elif element.get("type") == "relation":
|
|
||||||
for member in element.get("members", []):
|
|
||||||
if member.get("type") == "way" and "geometry" in member:
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in member["geometry"]]
|
|
||||||
if len(coords) >= MIN_LINE_COORDS:
|
|
||||||
coord_lists.append(coords)
|
|
||||||
|
|
||||||
return coord_lists
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_lakes() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish lakes, sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with lake polygons.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_lakes.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching lakes data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:300];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["natural"="water"]["water"="lake"]["name"](area.pl);
|
|
||||||
way["natural"="water"]["water"="lake"]["name"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
geometry = _extract_polygon_from_element(element)
|
|
||||||
if geometry is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} lakes.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
gdf = _add_area_column(gdf)
|
|
||||||
|
|
||||||
if len(gdf) > 0:
|
|
||||||
# Filter to lakes > MIN_LAKE_AREA_KM2 to exclude tiny ponds
|
|
||||||
gdf = gdf[gdf["area_km2"] > MIN_LAKE_AREA_KM2]
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_rivers() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish rivers, sorted by length descending.
|
|
||||||
|
|
||||||
Rivers with the same name but in different locations are kept separate
|
|
||||||
by using unique IDs from OSM when available.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with river linestrings.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_rivers.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "length_km" in gdf.columns:
|
|
||||||
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching rivers data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:300];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["waterway"="river"]["name"](area.pl);
|
|
||||||
way["waterway"="river"]["name"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
# Group ways by river name AND wikidata ID (or OSM ID for uniqueness)
|
|
||||||
# This prevents merging different rivers with the same name
|
|
||||||
rivers_by_key: dict[str, list[list[tuple[float, float]]]] = {}
|
|
||||||
river_names: dict[str, str] = {} # key -> display name
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Use wikidata ID if available, otherwise use element type+id
|
|
||||||
wikidata = element.get("tags", {}).get("wikidata", "")
|
|
||||||
if wikidata:
|
|
||||||
key = f"{name}_{wikidata}"
|
|
||||||
else:
|
|
||||||
# Fall back to element ID for grouping related ways
|
|
||||||
key = f"{name}_{element.get('type')}_{element.get('id')}"
|
|
||||||
|
|
||||||
coord_lists = _extract_river_coords_from_element(element)
|
|
||||||
if coord_lists:
|
|
||||||
rivers_by_key.setdefault(key, []).extend(coord_lists)
|
|
||||||
river_names[key] = name
|
|
||||||
|
|
||||||
features = []
|
|
||||||
for key, coord_lists in rivers_by_key.items():
|
|
||||||
name = river_names[key]
|
|
||||||
geometry: dict[str, Any]
|
|
||||||
if len(coord_lists) == 1:
|
|
||||||
geometry = {"type": "LineString", "coordinates": coord_lists[0]}
|
|
||||||
else:
|
|
||||||
geometry = {"type": "MultiLineString", "coordinates": coord_lists}
|
|
||||||
|
|
||||||
features.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} rivers.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
gdf = _add_length_column(gdf)
|
|
||||||
|
|
||||||
if len(gdf) > 0:
|
|
||||||
gdf = gdf[gdf["length_km"] > MIN_RIVER_LENGTH_KM]
|
|
||||||
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
|
|
||||||
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_islands() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish islands, sorted by area descending.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with island polygons.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_islands.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "area_km2" in gdf.columns:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching islands data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:180];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["place"="island"]["name"](area.pl);
|
|
||||||
way["place"="island"]["name"](area.pl);
|
|
||||||
relation["place"="islet"]["name"](area.pl);
|
|
||||||
way["place"="islet"]["name"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
geometry = _extract_polygon_from_element(element)
|
|
||||||
if geometry is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} islands.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
gdf = _add_area_column(gdf)
|
|
||||||
|
|
||||||
if len(gdf) > 0:
|
|
||||||
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_coastal_features() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish coastal features (peninsulas, spits, cliffs), sorted by length.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with coastal feature geometries.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_coastal_features.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
if "length_km" in gdf.columns:
|
|
||||||
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching coastal features data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:180];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["natural"="peninsula"]["name"](area.pl);
|
|
||||||
way["natural"="peninsula"]["name"](area.pl);
|
|
||||||
relation["natural"="spit"]["name"](area.pl);
|
|
||||||
way["natural"="spit"]["name"](area.pl);
|
|
||||||
relation["natural"="cliff"]["name"](area.pl);
|
|
||||||
way["natural"="cliff"]["name"](area.pl);
|
|
||||||
relation["natural"="coastline"]["name"](area.pl);
|
|
||||||
way["natural"="beach"]["name"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
line_types = ("cliff", "beach", "coastline")
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
natural_type = element.get("tags", {}).get("natural", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
geometry = _extract_coastal_geometry(element, natural_type, line_types)
|
|
||||||
if geometry is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name, "type": natural_type},
|
|
||||||
"geometry": geometry,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} coastal features.\n")
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
gdf = _add_length_column(gdf)
|
|
||||||
|
|
||||||
if len(gdf) > 0:
|
|
||||||
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
|
|
||||||
return gdf
|
|
||||||
|
|
||||||
|
|
||||||
def get_polish_unesco_sites() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Polish UNESCO World Heritage Sites, sorted by inscription year.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with UNESCO site geometries.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "polish_unesco_sites.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching UNESCO sites data from OSM...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:180];
|
|
||||||
area["ISO3166-1"="PL"]->.pl;
|
|
||||||
(
|
|
||||||
relation["heritage"="world_heritage_site"]["name"](area.pl);
|
|
||||||
way["heritage"="world_heritage_site"]["name"](area.pl);
|
|
||||||
node["heritage"="world_heritage_site"]["name"](area.pl);
|
|
||||||
relation["heritage:operator"="whc"]["name"](area.pl);
|
|
||||||
way["heritage:operator"="whc"]["name"](area.pl);
|
|
||||||
node["heritage:operator"="whc"]["name"](area.pl);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
min_ring_coords = 4
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if element.get("type") == "node":
|
|
||||||
geometry: dict[str, Any] = {
|
|
||||||
"type": "Point",
|
|
||||||
"coordinates": [element["lon"], element["lat"]],
|
|
||||||
}
|
|
||||||
elif element.get("type") == "relation":
|
|
||||||
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
|
|
||||||
if not outer_rings:
|
|
||||||
continue
|
|
||||||
geometry = _build_osiedla_geometry(outer_rings, inner_rings)
|
|
||||||
elif element.get("type") == "way" and "geometry" in element:
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) < min_ring_coords:
|
|
||||||
continue
|
|
||||||
if coords[0] != coords[-1]:
|
|
||||||
coords.append(coords[0])
|
|
||||||
geometry = {"type": "Polygon", "coordinates": [coords]}
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} UNESCO sites.\n")
|
|
||||||
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
@ -1,407 +0,0 @@
|
|||||||
"""Warsaw geographic data functions.
|
|
||||||
|
|
||||||
Functions for downloading and caching Warsaw-specific geographic data:
|
|
||||||
boundaries, districts, Vistula river, bridges, metro stations, and osiedla.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import LineString
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import (
|
|
||||||
_PKG_DIR,
|
|
||||||
CACHE_DIR,
|
|
||||||
_build_osiedla_geometry,
|
|
||||||
_ensure_cache_dir,
|
|
||||||
_extract_osiedla_rings,
|
|
||||||
_overpass_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_warsaw_boundary() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Warsaw city boundary.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with Warsaw boundary polygon.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "warsaw_boundary.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
# Try to use districts file first
|
|
||||||
districts_path = (
|
|
||||||
_PKG_DIR / "anki_decks" / "warsaw_districts" / "warszawa-dzielnice.geojson"
|
|
||||||
)
|
|
||||||
if districts_path.exists():
|
|
||||||
warsaw_gdf = gpd.read_file(districts_path)
|
|
||||||
warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"]
|
|
||||||
if len(warsaw_boundary) == 0:
|
|
||||||
warsaw_boundary = gpd.GeoDataFrame(
|
|
||||||
geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs
|
|
||||||
)
|
|
||||||
_ensure_cache_dir()
|
|
||||||
warsaw_boundary.to_file(cache_path, driver="GeoJSON")
|
|
||||||
return warsaw_boundary
|
|
||||||
|
|
||||||
# Fallback to Overpass query
|
|
||||||
sys.stdout.write("Fetching Warsaw boundary from OpenStreetMap...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:60];
|
|
||||||
relation["name"="Warszawa"]["admin_level"="6"];
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") == "relation":
|
|
||||||
coords = []
|
|
||||||
for member in element.get("members", []):
|
|
||||||
if member.get("role") == "outer" and "geometry" in member:
|
|
||||||
coords.extend([(p["lon"], p["lat"]) for p in member["geometry"]])
|
|
||||||
if coords:
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": "Warszawa"},
|
|
||||||
"geometry": {"type": "Polygon", "coordinates": [coords]},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
|
|
||||||
def get_warsaw_districts() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Warsaw districts (dzielnice).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with district boundaries.
|
|
||||||
"""
|
|
||||||
districts_path = (
|
|
||||||
_PKG_DIR / "anki_decks" / "warsaw_districts" / "warszawa-dzielnice.geojson"
|
|
||||||
)
|
|
||||||
if districts_path.exists():
|
|
||||||
gdf = gpd.read_file(districts_path)
|
|
||||||
return gdf[gdf["name"] != "Warszawa"].copy()
|
|
||||||
|
|
||||||
msg = "Warsaw districts GeoJSON not found"
|
|
||||||
raise FileNotFoundError(msg)
|
|
||||||
|
|
||||||
|
|
||||||
def get_vistula_river() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Vistula river in Warsaw.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with river geometry.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "warsaw_vistula.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching Vistula river data...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:60];
|
|
||||||
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
||||||
(
|
|
||||||
way["waterway"="river"]["name"="Wisła"](area.warsaw);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
min_coords = 2
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") == "way" and "geometry" in element:
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) >= min_coords:
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": "Wisła"},
|
|
||||||
"geometry": {"type": "LineString", "coordinates": coords},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
|
|
||||||
def get_warsaw_bridges() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Warsaw bridges over the Vistula.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with bridge geometries.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "warsaw_bridges.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching Warsaw bridges data...\n")
|
|
||||||
|
|
||||||
# First get the Vistula to filter bridges
|
|
||||||
vistula = get_vistula_river()
|
|
||||||
vistula_union = vistula.union_all()
|
|
||||||
vistula_buffer = vistula_union.buffer(0.002) # ~200m buffer
|
|
||||||
|
|
||||||
# Query for bridges with "Most" in name - smaller query
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:90];
|
|
||||||
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
||||||
way["bridge"="yes"]["name"~"^Most"](area.warsaw);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
min_coords = 2
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") != "way" or "geometry" not in element:
|
|
||||||
continue
|
|
||||||
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) < min_coords:
|
|
||||||
continue
|
|
||||||
|
|
||||||
line = LineString(coords)
|
|
||||||
|
|
||||||
# Check if bridge crosses/is near Vistula
|
|
||||||
if line.intersects(vistula_buffer):
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name, "osm_id": element.get("id")},
|
|
||||||
"geometry": {"type": "LineString", "coordinates": coords},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Merge segments of the same bridge
|
|
||||||
merged_features = _merge_bridge_segments(features)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": merged_features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(merged_features)} bridges.\n")
|
|
||||||
return gpd.GeoDataFrame.from_features(merged_features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
|
|
||||||
def _merge_bridge_segments(features: list[dict]) -> list[dict]:
|
|
||||||
"""Merge bridge segments with the same name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
features: List of GeoJSON features.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of merged features.
|
|
||||||
"""
|
|
||||||
by_name: dict[str, list[list[tuple[float, float]]]] = {}
|
|
||||||
|
|
||||||
for feature in features:
|
|
||||||
name = feature["properties"]["name"]
|
|
||||||
coords = feature["geometry"]["coordinates"]
|
|
||||||
if name not in by_name:
|
|
||||||
by_name[name] = []
|
|
||||||
by_name[name].append(coords)
|
|
||||||
|
|
||||||
merged = []
|
|
||||||
for name, coord_lists in by_name.items():
|
|
||||||
if len(coord_lists) == 1:
|
|
||||||
geom = {"type": "LineString", "coordinates": coord_lists[0]}
|
|
||||||
else:
|
|
||||||
geom = {"type": "MultiLineString", "coordinates": coord_lists}
|
|
||||||
|
|
||||||
merged.append(
|
|
||||||
{"type": "Feature", "properties": {"name": name}, "geometry": geom}
|
|
||||||
)
|
|
||||||
|
|
||||||
return merged
|
|
||||||
|
|
||||||
|
|
||||||
def get_warsaw_metro_stations() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Warsaw metro stations with line information.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with station points and line info (M1, M2, or M1/M2).
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "warsaw_metro.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
# Known stations for each line (as of 2024)
|
|
||||||
m1_stations = {
|
|
||||||
"Kabaty",
|
|
||||||
"Natolin",
|
|
||||||
"Imielin",
|
|
||||||
"Stokłosy",
|
|
||||||
"Ursynów",
|
|
||||||
"Służew",
|
|
||||||
"Wilanowska",
|
|
||||||
"Wierzbno",
|
|
||||||
"Racławicka",
|
|
||||||
"Pole Mokotowskie",
|
|
||||||
"Politechnika",
|
|
||||||
"Centrum",
|
|
||||||
"Świętokrzyska", # Also M2
|
|
||||||
"Ratusz-Arsenał",
|
|
||||||
"Dworzec Gdański",
|
|
||||||
"Plac Wilsona",
|
|
||||||
"Marymont",
|
|
||||||
"Słodowiec",
|
|
||||||
"Stare Bielany",
|
|
||||||
"Wawrzyszew",
|
|
||||||
"Młociny",
|
|
||||||
}
|
|
||||||
m2_stations = {
|
|
||||||
"Bródno",
|
|
||||||
"Kondratowicza",
|
|
||||||
"Zacisze",
|
|
||||||
"Targówek Mieszkaniowy",
|
|
||||||
"Trocka",
|
|
||||||
"Szwedzka",
|
|
||||||
"Dworzec Wileński",
|
|
||||||
"Świętokrzyska", # Also M1
|
|
||||||
"Nowy Świat-Uniwersytet",
|
|
||||||
"Centrum Nauki Kopernik",
|
|
||||||
"Stadion Narodowy",
|
|
||||||
"Rondo ONZ",
|
|
||||||
"Rondo Daszyńskiego",
|
|
||||||
"Płocka",
|
|
||||||
"Młynów",
|
|
||||||
"Księcia Janusza",
|
|
||||||
"Ulrychów",
|
|
||||||
"Bemowo",
|
|
||||||
}
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching metro station data...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:60];
|
|
||||||
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
||||||
(
|
|
||||||
node["railway"="station"]["station"="subway"](area.warsaw);
|
|
||||||
node["railway"="station"]["network"~"Metro"](area.warsaw);
|
|
||||||
);
|
|
||||||
out body;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") == "node":
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if name and name not in seen_names:
|
|
||||||
seen_names.add(name)
|
|
||||||
# Determine line from known station lists
|
|
||||||
in_m1 = name in m1_stations
|
|
||||||
in_m2 = name in m2_stations
|
|
||||||
if in_m1 and in_m2:
|
|
||||||
line = "M1/M2"
|
|
||||||
elif in_m1:
|
|
||||||
line = "M1"
|
|
||||||
elif in_m2:
|
|
||||||
line = "M2"
|
|
||||||
else:
|
|
||||||
line = "?" # Unknown station
|
|
||||||
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {
|
|
||||||
"name": name,
|
|
||||||
"line": line,
|
|
||||||
},
|
|
||||||
"geometry": {
|
|
||||||
"type": "Point",
|
|
||||||
"coordinates": [element["lon"], element["lat"]],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} metro stations.\n")
|
|
||||||
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
|
|
||||||
|
|
||||||
def get_warsaw_osiedla() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Warsaw osiedla (neighborhoods).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with osiedla boundaries.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "warsaw_osiedla.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching osiedla data...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:180];
|
|
||||||
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
||||||
relation["boundary"="administrative"]["admin_level"="11"]["name"](area.warsaw);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
min_ring_coords = 4
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") != "relation":
|
|
||||||
continue
|
|
||||||
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
|
|
||||||
if not outer_rings:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name},
|
|
||||||
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} osiedla.\n")
|
|
||||||
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
@ -1,189 +0,0 @@
|
|||||||
"""Warsaw streets, landmarks, and place data.
|
|
||||||
|
|
||||||
Functions for downloading and caching Warsaw streets, landmarks,
|
|
||||||
and other place-related geographic data.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import MultiLineString
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import CACHE_DIR, _ensure_cache_dir, _overpass_query
|
|
||||||
|
|
||||||
|
|
||||||
def get_warsaw_streets(min_length: int = 500) -> gpd.GeoDataFrame:
|
|
||||||
"""Get major Warsaw streets.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
min_length: Minimum street length in meters.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with street geometries.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "warsaw_streets.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
gdf = gpd.read_file(cache_path)
|
|
||||||
# Filter by length if needed
|
|
||||||
return _filter_streets_by_length(gdf, min_length)
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching street data from OpenStreetMap...\n")
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:120];
|
|
||||||
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
||||||
(
|
|
||||||
way["highway"="primary"]["name"](area.warsaw);
|
|
||||||
way["highway"="secondary"]["name"](area.warsaw);
|
|
||||||
way["highway"="tertiary"]["name"](area.warsaw);
|
|
||||||
);
|
|
||||||
out geom;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
min_coords = 2
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
if element.get("type") == "way" and "geometry" in element:
|
|
||||||
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
||||||
if len(coords) >= min_coords:
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {
|
|
||||||
"name": element.get("tags", {}).get("name", "Unknown"),
|
|
||||||
"highway": element.get("tags", {}).get("highway", ""),
|
|
||||||
},
|
|
||||||
"geometry": {"type": "LineString", "coordinates": coords},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} street segments.\n")
|
|
||||||
|
|
||||||
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
return _filter_streets_by_length(gdf, min_length)
|
|
||||||
|
|
||||||
|
|
||||||
def _filter_streets_by_length(
|
|
||||||
gdf: gpd.GeoDataFrame, min_length: int
|
|
||||||
) -> gpd.GeoDataFrame:
|
|
||||||
"""Filter and merge streets by name, keeping only those above min_length.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
gdf: GeoDataFrame with street segments.
|
|
||||||
min_length: Minimum length in meters.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with merged streets, sorted by length (longest first).
|
|
||||||
"""
|
|
||||||
# Group by street name
|
|
||||||
streets: dict[str, list] = {}
|
|
||||||
for _, row in gdf.iterrows():
|
|
||||||
name = row.get("name", "Unknown")
|
|
||||||
if name and name != "Unknown":
|
|
||||||
if name not in streets:
|
|
||||||
streets[name] = []
|
|
||||||
streets[name].append(row.geometry)
|
|
||||||
|
|
||||||
# Merge and filter
|
|
||||||
result_rows = []
|
|
||||||
for name, geometries in streets.items():
|
|
||||||
merged = geometries[0] if len(geometries) == 1 else MultiLineString(geometries)
|
|
||||||
|
|
||||||
# Create temp GeoDataFrame for length calculation
|
|
||||||
temp_gdf = gpd.GeoDataFrame(geometry=[merged], crs="EPSG:4326")
|
|
||||||
temp_proj = temp_gdf.to_crs("EPSG:2180") # Polish coordinate system
|
|
||||||
length = temp_proj.geometry.length.iloc[0]
|
|
||||||
|
|
||||||
if length >= min_length:
|
|
||||||
result_rows.append({"name": name, "geometry": merged, "length_m": length})
|
|
||||||
|
|
||||||
# Sort by length (longest first)
|
|
||||||
result_rows.sort(key=lambda x: x["length_m"], reverse=True)
|
|
||||||
|
|
||||||
return gpd.GeoDataFrame(
|
|
||||||
result_rows,
|
|
||||||
crs="EPSG:4326" if result_rows else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_warsaw_landmarks() -> gpd.GeoDataFrame:
|
|
||||||
"""Get Warsaw landmarks (museums, monuments, parks, etc.).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with landmark points.
|
|
||||||
"""
|
|
||||||
cache_path = CACHE_DIR / "warsaw_landmarks.geojson"
|
|
||||||
|
|
||||||
if cache_path.exists():
|
|
||||||
return gpd.read_file(cache_path)
|
|
||||||
|
|
||||||
sys.stdout.write("Fetching landmark data...\n")
|
|
||||||
# Simplified query - just museums and major attractions
|
|
||||||
query = """
|
|
||||||
[out:json][timeout:60];
|
|
||||||
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
||||||
(
|
|
||||||
node["tourism"="museum"]["name"](area.warsaw);
|
|
||||||
node["tourism"="attraction"]["name"](area.warsaw);
|
|
||||||
node["historic"="monument"]["name"](area.warsaw);
|
|
||||||
way["tourism"="museum"]["name"](area.warsaw);
|
|
||||||
way["tourism"="attraction"]["name"](area.warsaw);
|
|
||||||
);
|
|
||||||
out center;
|
|
||||||
"""
|
|
||||||
|
|
||||||
data = _overpass_query(query)
|
|
||||||
|
|
||||||
features = []
|
|
||||||
seen_names: set[str] = set()
|
|
||||||
|
|
||||||
for element in data.get("elements", []):
|
|
||||||
name = element.get("tags", {}).get("name", "")
|
|
||||||
if not name or name in seen_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get coordinates
|
|
||||||
if element.get("type") == "node":
|
|
||||||
lon, lat = element["lon"], element["lat"]
|
|
||||||
elif "center" in element:
|
|
||||||
lon, lat = element["center"]["lon"], element["center"]["lat"]
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_names.add(name)
|
|
||||||
landmark_type = (
|
|
||||||
element.get("tags", {}).get("tourism")
|
|
||||||
or element.get("tags", {}).get("historic")
|
|
||||||
or element.get("tags", {}).get("leisure")
|
|
||||||
or "landmark"
|
|
||||||
)
|
|
||||||
|
|
||||||
features.append(
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": name, "type": landmark_type},
|
|
||||||
"geometry": {"type": "Point", "coordinates": [lon, lat]},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_ensure_cache_dir()
|
|
||||||
geojson = {"type": "FeatureCollection", "features": features}
|
|
||||||
cache_path.write_text(json.dumps(geojson))
|
|
||||||
|
|
||||||
sys.stdout.write(f"Cached {len(features)} landmarks.\n")
|
|
||||||
|
|
||||||
if not features:
|
|
||||||
return gpd.GeoDataFrame(
|
|
||||||
{"name": [], "type": [], "geometry": []}, crs="EPSG:4326"
|
|
||||||
)
|
|
||||||
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
||||||
@ -1,487 +0,0 @@
|
|||||||
"""Tests for python_pkg.geo_data._common module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from shapely.geometry import (
|
|
||||||
GeometryCollection,
|
|
||||||
LineString,
|
|
||||||
MultiPolygon,
|
|
||||||
Point,
|
|
||||||
Polygon,
|
|
||||||
)
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import (
|
|
||||||
_build_osiedla_geometry,
|
|
||||||
_download_github_geojson,
|
|
||||||
_ensure_cache_dir,
|
|
||||||
_extract_line_from_way,
|
|
||||||
_extract_osiedla_rings,
|
|
||||||
_extract_polygon_from_element,
|
|
||||||
_extract_polygonal_geometry,
|
|
||||||
_overpass_query,
|
|
||||||
_try_single_request,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestEnsureCacheDir:
|
|
||||||
"""Tests for _ensure_cache_dir."""
|
|
||||||
|
|
||||||
def test_creates_directory(self) -> None:
|
|
||||||
with patch.object(Path, "mkdir") as mock_mkdir:
|
|
||||||
_ensure_cache_dir()
|
|
||||||
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractPolygonalGeometry:
|
|
||||||
"""Tests for _extract_polygonal_geometry."""
|
|
||||||
|
|
||||||
def test_polygon_returned_directly(self) -> None:
|
|
||||||
poly = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
|
|
||||||
result = _extract_polygonal_geometry(poly)
|
|
||||||
assert result is poly
|
|
||||||
|
|
||||||
def test_multipolygon_returned_directly(self) -> None:
|
|
||||||
mp = MultiPolygon(
|
|
||||||
[
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
result = _extract_polygonal_geometry(mp)
|
|
||||||
assert result is mp
|
|
||||||
|
|
||||||
def test_geometry_collection_single_polygon(self) -> None:
|
|
||||||
poly = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
|
|
||||||
gc = GeometryCollection([poly, LineString([(0, 0), (1, 1)])])
|
|
||||||
result = _extract_polygonal_geometry(gc)
|
|
||||||
assert result is not None
|
|
||||||
assert result.equals(poly)
|
|
||||||
|
|
||||||
def test_geometry_collection_multiple_polygons(self) -> None:
|
|
||||||
p1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
|
|
||||||
p2 = Polygon([(2, 2), (3, 2), (3, 3), (2, 3)])
|
|
||||||
gc = GeometryCollection([p1, p2, LineString([(0, 0), (1, 1)])])
|
|
||||||
result = _extract_polygonal_geometry(gc)
|
|
||||||
assert isinstance(result, MultiPolygon)
|
|
||||||
|
|
||||||
def test_geometry_collection_with_multipolygon(self) -> None:
|
|
||||||
p1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
|
|
||||||
mp = MultiPolygon(
|
|
||||||
[
|
|
||||||
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
|
|
||||||
Polygon([(4, 4), (5, 4), (5, 5), (4, 5)]),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
gc = GeometryCollection([p1, mp])
|
|
||||||
result = _extract_polygonal_geometry(gc)
|
|
||||||
assert isinstance(result, MultiPolygon)
|
|
||||||
|
|
||||||
def test_geometry_collection_no_polygons(self) -> None:
|
|
||||||
gc = GeometryCollection([LineString([(0, 0), (1, 1)])])
|
|
||||||
result = _extract_polygonal_geometry(gc)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_unsupported_geometry_type(self) -> None:
|
|
||||||
point = Point(0, 0)
|
|
||||||
result = _extract_polygonal_geometry(point)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
|
|
||||||
class TestTrySingleRequest:
|
|
||||||
"""Tests for _try_single_request."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.requests.post")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_successful_request(
|
|
||||||
self, mock_stdout: MagicMock, mock_post: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.json.return_value = {"elements": []}
|
|
||||||
mock_post.return_value = mock_response
|
|
||||||
|
|
||||||
result, error = _try_single_request("http://example.com", "query")
|
|
||||||
assert result == {"elements": []}
|
|
||||||
assert error is None
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.requests.post")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_request_exception(
|
|
||||||
self, mock_stdout: MagicMock, mock_post: MagicMock
|
|
||||||
) -> None:
|
|
||||||
import requests
|
|
||||||
|
|
||||||
mock_post.side_effect = requests.RequestException("fail")
|
|
||||||
result, error = _try_single_request("http://example.com", "query")
|
|
||||||
assert result is None
|
|
||||||
assert isinstance(error, requests.RequestException)
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.requests.post")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_invalid_response_format(
|
|
||||||
self, mock_stdout: MagicMock, mock_post: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.json.return_value = {"no_elements": True}
|
|
||||||
mock_post.return_value = mock_response
|
|
||||||
|
|
||||||
result, error = _try_single_request("http://example.com", "query")
|
|
||||||
assert result is None
|
|
||||||
assert isinstance(error, ValueError)
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.requests.post")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_non_dict_response(
|
|
||||||
self, mock_stdout: MagicMock, mock_post: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.json.return_value = [1, 2, 3]
|
|
||||||
mock_post.return_value = mock_response
|
|
||||||
|
|
||||||
result, error = _try_single_request("http://example.com", "query")
|
|
||||||
assert result is None
|
|
||||||
assert isinstance(error, ValueError)
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.requests.post")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_value_error_on_json_parse(
|
|
||||||
self, mock_stdout: MagicMock, mock_post: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.json.side_effect = ValueError("bad json")
|
|
||||||
mock_post.return_value = mock_response
|
|
||||||
|
|
||||||
result, error = _try_single_request("http://example.com", "query")
|
|
||||||
assert result is None
|
|
||||||
assert isinstance(error, ValueError)
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.requests.post")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_timeout_error(self, mock_stdout: MagicMock, mock_post: MagicMock) -> None:
|
|
||||||
import requests
|
|
||||||
|
|
||||||
mock_post.side_effect = requests.Timeout("timeout")
|
|
||||||
result, error = _try_single_request("http://example.com", "query")
|
|
||||||
assert result is None
|
|
||||||
assert isinstance(error, requests.Timeout)
|
|
||||||
|
|
||||||
|
|
||||||
class TestOverpassQuery:
|
|
||||||
"""Tests for _overpass_query."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common._try_single_request")
|
|
||||||
def test_success_on_first_try(self, mock_req: MagicMock) -> None:
|
|
||||||
mock_req.return_value = ({"elements": []}, None)
|
|
||||||
result = _overpass_query("query")
|
|
||||||
assert result == {"elements": []}
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.time.sleep")
|
|
||||||
@patch("python_pkg.geo_data._common._try_single_request")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_retries_then_succeeds(
|
|
||||||
self, mock_stdout: MagicMock, mock_req: MagicMock, mock_sleep: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_req.side_effect = [
|
|
||||||
(None, ValueError("fail1")),
|
|
||||||
({"elements": []}, None),
|
|
||||||
]
|
|
||||||
result = _overpass_query("query")
|
|
||||||
assert result == {"elements": []}
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.time.sleep")
|
|
||||||
@patch("python_pkg.geo_data._common._try_single_request")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_all_endpoints_fail(
|
|
||||||
self, mock_stdout: MagicMock, mock_req: MagicMock, mock_sleep: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_req.return_value = (None, ValueError("fail"))
|
|
||||||
with pytest.raises(RuntimeError, match="All Overpass API endpoints failed"):
|
|
||||||
_overpass_query("query")
|
|
||||||
|
|
||||||
|
|
||||||
class TestDownloadGithubGeojson:
|
|
||||||
"""Tests for _download_github_geojson."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.gpd.read_file")
|
|
||||||
def test_cached_file_exists(self, mock_read: MagicMock) -> None:
|
|
||||||
mock_gdf = MagicMock()
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
cache_path = MagicMock()
|
|
||||||
cache_path.exists.return_value = True
|
|
||||||
|
|
||||||
result = _download_github_geojson("http://example.com/data.geojson", cache_path)
|
|
||||||
assert result is mock_gdf
|
|
||||||
mock_read.assert_called_once_with(cache_path)
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._common.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._common._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._common.requests.get")
|
|
||||||
@patch("python_pkg.geo_data._common.sys.stdout")
|
|
||||||
def test_downloads_and_caches(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_get: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
features_data: dict[str, Any] = {
|
|
||||||
"features": [
|
|
||||||
{
|
|
||||||
"type": "Feature",
|
|
||||||
"properties": {"name": "test"},
|
|
||||||
"geometry": {"type": "Point", "coordinates": [0, 0]},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.json.return_value = features_data
|
|
||||||
mock_get.return_value = mock_response
|
|
||||||
|
|
||||||
mock_gdf = MagicMock()
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
cache_path = MagicMock()
|
|
||||||
cache_path.exists.return_value = False
|
|
||||||
|
|
||||||
result = _download_github_geojson(
|
|
||||||
"https://example.com/data.geojson", cache_path
|
|
||||||
)
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
def test_unsupported_url_scheme(self) -> None:
|
|
||||||
cache_path = MagicMock()
|
|
||||||
cache_path.exists.return_value = False
|
|
||||||
with pytest.raises(ValueError, match="Unsupported URL scheme"):
|
|
||||||
_download_github_geojson("ftp://example.com/data", cache_path)
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractOsiedlaRings:
|
|
||||||
"""Tests for _extract_osiedla_rings."""
|
|
||||||
|
|
||||||
def test_outer_and_inner_rings(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"role": "inner",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0.2, "lat": 0.2},
|
|
||||||
{"lon": 0.4, "lat": 0.2},
|
|
||||||
{"lon": 0.4, "lat": 0.4},
|
|
||||||
{"lon": 0.2, "lat": 0.4},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
outer, inner = _extract_osiedla_rings(element, 4)
|
|
||||||
assert len(outer) == 1
|
|
||||||
assert len(inner) == 1
|
|
||||||
|
|
||||||
def test_ring_too_short(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}],
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
outer, inner = _extract_osiedla_rings(element, 4)
|
|
||||||
assert len(outer) == 0
|
|
||||||
assert len(inner) == 0
|
|
||||||
|
|
||||||
def test_no_geometry_in_member(self) -> None:
|
|
||||||
element: dict[str, Any] = {"members": [{"role": "outer"}]}
|
|
||||||
outer, inner = _extract_osiedla_rings(element, 4)
|
|
||||||
assert len(outer) == 0
|
|
||||||
assert len(inner) == 0
|
|
||||||
|
|
||||||
def test_already_closed_ring(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
outer, _ = _extract_osiedla_rings(element, 4)
|
|
||||||
assert len(outer) == 1
|
|
||||||
# Already closed, so no extra point
|
|
||||||
assert outer[0][0] == outer[0][-1]
|
|
||||||
|
|
||||||
def test_no_members(self) -> None:
|
|
||||||
element: dict[str, Any] = {}
|
|
||||||
outer, inner = _extract_osiedla_rings(element, 4)
|
|
||||||
assert len(outer) == 0
|
|
||||||
assert len(inner) == 0
|
|
||||||
|
|
||||||
def test_unknown_role_ignored(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "label",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
outer, inner = _extract_osiedla_rings(element, 4)
|
|
||||||
assert len(outer) == 0
|
|
||||||
assert len(inner) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestBuildOsiedlaGeometry:
|
|
||||||
"""Tests for _build_osiedla_geometry."""
|
|
||||||
|
|
||||||
def test_single_outer_ring(self) -> None:
|
|
||||||
outer = [[(0, 0), (1, 0), (1, 1), (0, 0)]]
|
|
||||||
inner: list[list[tuple[float, float]]] = []
|
|
||||||
result = _build_osiedla_geometry(outer, inner)
|
|
||||||
assert result["type"] == "Polygon"
|
|
||||||
|
|
||||||
def test_single_outer_with_inner(self) -> None:
|
|
||||||
outer = [[(0, 0), (1, 0), (1, 1), (0, 0)]]
|
|
||||||
inner = [[(0.2, 0.2), (0.4, 0.2), (0.4, 0.4), (0.2, 0.2)]]
|
|
||||||
result = _build_osiedla_geometry(outer, inner)
|
|
||||||
assert result["type"] == "Polygon"
|
|
||||||
assert len(result["coordinates"]) == 2
|
|
||||||
|
|
||||||
def test_multiple_outer_rings(self) -> None:
|
|
||||||
outer = [
|
|
||||||
[(0, 0), (1, 0), (1, 1), (0, 0)],
|
|
||||||
[(2, 2), (3, 2), (3, 3), (2, 2)],
|
|
||||||
]
|
|
||||||
inner: list[list[tuple[float, float]]] = []
|
|
||||||
result = _build_osiedla_geometry(outer, inner)
|
|
||||||
assert result["type"] == "MultiPolygon"
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractPolygonFromElement:
|
|
||||||
"""Tests for _extract_polygon_from_element."""
|
|
||||||
|
|
||||||
def test_relation_with_rings(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "relation",
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_polygon_from_element(element)
|
|
||||||
assert result is not None
|
|
||||||
assert result["type"] == "Polygon"
|
|
||||||
|
|
||||||
def test_relation_without_outer_rings(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "relation",
|
|
||||||
"members": [{"role": "inner", "geometry": [{"lon": 0, "lat": 0}]}],
|
|
||||||
}
|
|
||||||
result = _extract_polygon_from_element(element)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_way_with_enough_coords(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_polygon_from_element(element)
|
|
||||||
assert result is not None
|
|
||||||
assert result["type"] == "Polygon"
|
|
||||||
# Should close the ring
|
|
||||||
assert result["coordinates"][0][0] == result["coordinates"][0][-1]
|
|
||||||
|
|
||||||
def test_way_already_closed(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_polygon_from_element(element)
|
|
||||||
assert result is not None
|
|
||||||
|
|
||||||
def test_way_too_few_coords(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}],
|
|
||||||
}
|
|
||||||
result = _extract_polygon_from_element(element)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_way_no_geometry(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "way"}
|
|
||||||
result = _extract_polygon_from_element(element)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_unknown_type(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "node"}
|
|
||||||
result = _extract_polygon_from_element(element)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractLineFromWay:
|
|
||||||
"""Tests for _extract_line_from_way."""
|
|
||||||
|
|
||||||
def test_valid_way(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
}
|
|
||||||
result = _extract_line_from_way(element)
|
|
||||||
assert result is not None
|
|
||||||
assert result["type"] == "LineString"
|
|
||||||
|
|
||||||
def test_too_few_coords(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
}
|
|
||||||
result = _extract_line_from_way(element)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_not_a_way(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "node"}
|
|
||||||
result = _extract_line_from_way(element)
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_way_no_geometry(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "way"}
|
|
||||||
result = _extract_line_from_way(element)
|
|
||||||
assert result is None
|
|
||||||
@ -1,54 +0,0 @@
|
|||||||
"""Tests for _add_area_column and _add_length_column (non-empty GDFs)."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import LineString, Polygon
|
|
||||||
|
|
||||||
from python_pkg.geo_data._common import _add_area_column, _add_length_column
|
|
||||||
|
|
||||||
|
|
||||||
class TestAddAreaColumnNonEmpty:
|
|
||||||
"""Tests for _add_area_column with non-empty GeoDataFrame."""
|
|
||||||
|
|
||||||
def test_adds_area_column(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["A"]},
|
|
||||||
geometry=[Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
result = _add_area_column(gdf)
|
|
||||||
assert "area_km2" in result.columns
|
|
||||||
assert result["area_km2"].iloc[0] > 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestAddLengthColumnNonEmpty:
|
|
||||||
"""Tests for _add_length_column with non-empty GeoDataFrame."""
|
|
||||||
|
|
||||||
def test_adds_length_column(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["A"]},
|
|
||||||
geometry=[LineString([(20, 50), (21, 51)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
result = _add_length_column(gdf)
|
|
||||||
assert "length_km" in result.columns
|
|
||||||
assert result["length_km"].iloc[0] > 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestAddAreaColumnEmpty:
|
|
||||||
"""Tests for _add_area_column with empty GeoDataFrame."""
|
|
||||||
|
|
||||||
def test_returns_empty_gdf(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
result = _add_area_column(gdf)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestAddLengthColumnEmpty:
|
|
||||||
"""Tests for _add_length_column with empty GeoDataFrame."""
|
|
||||||
|
|
||||||
def test_returns_empty_gdf(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
result = _add_length_column(gdf)
|
|
||||||
assert len(result) == 0
|
|
||||||
@ -1,85 +0,0 @@
|
|||||||
"""Tests for python_pkg.geo_data.__init__ module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
from python_pkg.geo_data import (
|
|
||||||
clear_cache,
|
|
||||||
download_all_poland_data,
|
|
||||||
download_all_warsaw_data,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDownloadAllWarsawData:
|
|
||||||
"""Tests for download_all_warsaw_data."""
|
|
||||||
|
|
||||||
def test_calls_all_warsaw_functions(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data.get_warsaw_boundary") as mock_boundary,
|
|
||||||
patch("python_pkg.geo_data.get_vistula_river") as mock_vistula,
|
|
||||||
patch("python_pkg.geo_data.get_warsaw_bridges") as mock_bridges,
|
|
||||||
patch("python_pkg.geo_data.get_warsaw_metro_stations") as mock_metro,
|
|
||||||
patch("python_pkg.geo_data.get_warsaw_streets") as mock_streets,
|
|
||||||
patch("python_pkg.geo_data.get_warsaw_landmarks") as mock_landmarks,
|
|
||||||
patch("python_pkg.geo_data.get_warsaw_osiedla") as mock_osiedla,
|
|
||||||
):
|
|
||||||
download_all_warsaw_data()
|
|
||||||
mock_boundary.assert_called_once()
|
|
||||||
mock_vistula.assert_called_once()
|
|
||||||
mock_bridges.assert_called_once()
|
|
||||||
mock_metro.assert_called_once()
|
|
||||||
mock_streets.assert_called_once()
|
|
||||||
mock_landmarks.assert_called_once()
|
|
||||||
mock_osiedla.assert_called_once()
|
|
||||||
|
|
||||||
|
|
||||||
class TestDownloadAllPolandData:
|
|
||||||
"""Tests for download_all_poland_data."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data.get_poland_boundary")
|
|
||||||
@patch("python_pkg.geo_data.get_polish_gminy")
|
|
||||||
@patch("python_pkg.geo_data.get_polish_powiaty")
|
|
||||||
@patch("python_pkg.geo_data.get_polish_wojewodztwa")
|
|
||||||
@patch("python_pkg.geo_data.sys.stdout")
|
|
||||||
def test_calls_all_poland_functions(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_woj: MagicMock,
|
|
||||||
mock_powiaty: MagicMock,
|
|
||||||
mock_gminy: MagicMock,
|
|
||||||
mock_boundary: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
download_all_poland_data()
|
|
||||||
mock_woj.assert_called_once()
|
|
||||||
mock_powiaty.assert_called_once()
|
|
||||||
mock_gminy.assert_called_once()
|
|
||||||
mock_boundary.assert_called_once()
|
|
||||||
|
|
||||||
|
|
||||||
class TestClearCache:
|
|
||||||
"""Tests for clear_cache."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data.shutil.rmtree")
|
|
||||||
@patch("python_pkg.geo_data.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data.sys.stdout")
|
|
||||||
def test_cache_exists(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_rmtree: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_cache_dir.exists.return_value = True
|
|
||||||
clear_cache()
|
|
||||||
mock_rmtree.assert_called_once_with(mock_cache_dir)
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data.sys.stdout")
|
|
||||||
def test_cache_not_exists(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_cache_dir.exists.return_value = False
|
|
||||||
clear_cache()
|
|
||||||
@ -1,313 +0,0 @@
|
|||||||
"""Tests for python_pkg.geo_data._poland_admin module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import Polygon
|
|
||||||
|
|
||||||
from python_pkg.geo_data._poland_admin import (
|
|
||||||
_get_powiaty_population,
|
|
||||||
_query_wikidata,
|
|
||||||
get_poland_boundary,
|
|
||||||
get_polish_gminy,
|
|
||||||
get_polish_powiaty,
|
|
||||||
get_polish_wojewodztwa,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestQueryWikidata:
|
|
||||||
"""Tests for _query_wikidata."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.requests.get")
|
|
||||||
def test_successful_query(self, mock_get: MagicMock) -> None:
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.json.return_value = {
|
|
||||||
"results": {"bindings": [{"name": {"value": "test"}}]}
|
|
||||||
}
|
|
||||||
mock_get.return_value = mock_response
|
|
||||||
|
|
||||||
result = _query_wikidata("SELECT ?x WHERE {}")
|
|
||||||
assert result == [{"name": {"value": "test"}}]
|
|
||||||
mock_response.raise_for_status.assert_called_once()
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPowiatyPopulation:
|
|
||||||
"""Tests for _get_powiaty_population."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_path.read_text.return_value = json.dumps({"Kraków": 780000})
|
|
||||||
|
|
||||||
result = _get_powiaty_population()
|
|
||||||
assert result == {"Kraków": 780000}
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._query_wikidata")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.sys.stdout")
|
|
||||||
def test_downloads_and_caches(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = [
|
|
||||||
{
|
|
||||||
"powiatLabel": {"value": "powiat krakowski"},
|
|
||||||
"population": {"value": "100000"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"powiatLabel": {"value": "powiat wadowicki"},
|
|
||||||
"population": {"value": "bad_value"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"powiatLabel": {"value": ""},
|
|
||||||
"population": {"value": "50000"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"population": {"value": "30000"},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
result = _get_powiaty_population()
|
|
||||||
assert "krakowski" in result
|
|
||||||
mock_path.write_text.assert_called_once()
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._query_wikidata")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.sys.stdout")
|
|
||||||
def test_empty_label_skipped(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = [
|
|
||||||
{"powiatLabel": {"value": ""}, "population": {"value": "1000"}},
|
|
||||||
]
|
|
||||||
|
|
||||||
result = _get_powiaty_population()
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishWojewodztwa:
|
|
||||||
"""Tests for get_polish_wojewodztwa."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._download_github_geojson")
|
|
||||||
def test_returns_geodataframe(self, mock_download: MagicMock) -> None:
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_download.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_wojewodztwa()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishPowiaty:
|
|
||||||
"""Tests for get_polish_powiaty."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._get_powiaty_population")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._download_github_geojson")
|
|
||||||
def test_with_population(
|
|
||||||
self, mock_download: MagicMock, mock_pop: MagicMock
|
|
||||||
) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{"nazwa": ["powiat krakowski", "powiat Wadowice", "powiat xyz", ""]},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_download.return_value = gdf
|
|
||||||
mock_pop.return_value = {"krakowski": 100000, "wadowice": 50000}
|
|
||||||
|
|
||||||
result = get_polish_powiaty()
|
|
||||||
assert "population" in result.columns
|
|
||||||
# krakowski matched directly
|
|
||||||
assert result.iloc[0]["population"] == 100000
|
|
||||||
# Wadowice matched case-insensitively
|
|
||||||
assert result.iloc[1]["population"] == 50000
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishGminy:
|
|
||||||
"""Tests for get_polish_gminy."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{
|
|
||||||
"name": ["A", "B"],
|
|
||||||
"area_km2": [200.0, 100.0],
|
|
||||||
},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_gminy()
|
|
||||||
assert result.iloc[0]["area_km2"] == 200.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["A"]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_gminy()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_from_osm(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_admin.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_admin.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_admin._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_admin._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_admin.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch("python_pkg.geo_data._common._add_area_column") as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "relation",
|
|
||||||
"tags": {"name": "Gmina A"},
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate name - should be skipped
|
|
||||||
{
|
|
||||||
"type": "relation",
|
|
||||||
"tags": {"name": "Gmina A"},
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 2, "lat": 2},
|
|
||||||
{"lon": 3, "lat": 2},
|
|
||||||
{"lon": 3, "lat": 3},
|
|
||||||
{"lon": 2, "lat": 3},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Not a relation - should be skipped
|
|
||||||
{"type": "way", "tags": {"name": "Way"}},
|
|
||||||
# No name
|
|
||||||
{"type": "relation", "tags": {}},
|
|
||||||
# No outer rings
|
|
||||||
{
|
|
||||||
"type": "relation",
|
|
||||||
"tags": {"name": "Empty"},
|
|
||||||
"members": [],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Gmina A"], "area_km2": [100.0]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
mock_add_area.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_gminy()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolandBoundary:
|
|
||||||
"""Tests for get_poland_boundary."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_poland_boundary()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.gpd.GeoDataFrame.to_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.get_polish_wojewodztwa")
|
|
||||||
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
|
|
||||||
def test_dissolves_from_wojewodztwa(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_woj: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_to_file: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
woj_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["woj1", "woj2"]},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_woj.return_value = woj_gdf
|
|
||||||
|
|
||||||
result = get_poland_boundary()
|
|
||||||
assert len(result) == 1
|
|
||||||
@ -1,385 +0,0 @@
|
|||||||
"""Tests for python_pkg.geo_data._poland_nature module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
import pytest
|
|
||||||
from shapely.geometry import Polygon
|
|
||||||
|
|
||||||
from python_pkg.geo_data._poland_nature import (
|
|
||||||
get_polish_mountain_peaks,
|
|
||||||
get_polish_mountain_ranges,
|
|
||||||
get_polish_national_parks,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
|
|
||||||
"""Create a mock OSM relation element."""
|
|
||||||
members = []
|
|
||||||
if include_outer:
|
|
||||||
members.append(
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return {"type": "relation", "tags": {"name": name}, "members": members}
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishMountainPeaks:
|
|
||||||
"""Tests for get_polish_mountain_peaks."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Rysy", "Babia Góra"], "elevation": [2499.0, 1725.0]},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_mountain_peaks()
|
|
||||||
assert result.iloc[0]["elevation"] == 2499.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
|
|
||||||
def test_downloads_peaks(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Rysy", "ele": "2499"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 49.0,
|
|
||||||
},
|
|
||||||
# Below threshold
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "LowPeak", "ele": "100"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 49.0,
|
|
||||||
},
|
|
||||||
# Missing ele
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "NoEle"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 49.0,
|
|
||||||
},
|
|
||||||
# Duplicate name
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Rysy", "ele": "2499"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 49.0,
|
|
||||||
},
|
|
||||||
# Not a node
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Way", "ele": "500"},
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"ele": "500"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 49.0,
|
|
||||||
},
|
|
||||||
# Comma in ele
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Peak2", "ele": "500,5 m"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 49.0,
|
|
||||||
},
|
|
||||||
# Invalid ele
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "BadEle", "ele": "abc"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 49.0,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Rysy", "Peak2"], "elevation": [2499.0, 500.5]},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_mountain_peaks()
|
|
||||||
assert result.iloc[0]["elevation"] == 2499.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
|
|
||||||
def test_no_peaks_raises(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="No mountain peaks found"):
|
|
||||||
get_polish_mountain_peaks()
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishMountainRanges:
|
|
||||||
"""Tests for get_polish_mountain_ranges."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_read: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Tatry"], "area_km2": [100.0]},
|
|
||||||
geometry=[poly],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_mountain_ranges()
|
|
||||||
assert "area_km2" in result.columns
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_read: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Tatry"]},
|
|
||||||
geometry=[poly],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_mountain_ranges()
|
|
||||||
assert len(result) >= 0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
|
|
||||||
def test_downloads_ranges(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# Relation
|
|
||||||
_make_relation_element("Tatry"),
|
|
||||||
# Way with enough coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Bieszczady"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Way with auto-close
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Karkonosze"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 0.5},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Way already closed (first == last)
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Sudety"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 2, "lat": 2},
|
|
||||||
{"lon": 3, "lat": 2},
|
|
||||||
{"lon": 3, "lat": 3},
|
|
||||||
{"lon": 2, "lat": 2},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Way too few coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Short"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
_make_relation_element("Tatry"),
|
|
||||||
# No name
|
|
||||||
_make_relation_element(""),
|
|
||||||
# Unknown type
|
|
||||||
{"type": "node", "tags": {"name": "Ignored"}},
|
|
||||||
# Way without geometry
|
|
||||||
{"type": "way", "tags": {"name": "NoGeom"}},
|
|
||||||
# Relation without outer rings
|
|
||||||
_make_relation_element("NoOuter", include_outer=False),
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Tatry", "Bieszczady", "Karkonosze", "Sudety"]},
|
|
||||||
geometry=[poly, poly, poly, poly],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_mountain_ranges()
|
|
||||||
assert len(result) >= 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishNationalParks:
|
|
||||||
"""Tests for get_polish_national_parks."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Tatrzański Park Narodowy"], "area_km2": [200.0]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_national_parks()
|
|
||||||
assert result.iloc[0]["area_km2"] == 200.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Tatrzański Park Narodowy"]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_national_parks()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
|
|
||||||
def test_downloads_parks(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
_make_relation_element("Tatrzański Park Narodowy"),
|
|
||||||
# Not a national park (missing "Narodowy")
|
|
||||||
_make_relation_element("Some Reserve"),
|
|
||||||
# Not a relation
|
|
||||||
{"type": "way", "tags": {"name": "Park Narodowy X"}},
|
|
||||||
# No name
|
|
||||||
{"type": "relation", "tags": {}, "members": []},
|
|
||||||
# Duplicate
|
|
||||||
_make_relation_element("Tatrzański Park Narodowy"),
|
|
||||||
# No outer rings
|
|
||||||
_make_relation_element("Empty Park Narodowy", include_outer=False),
|
|
||||||
# Case insensitive match
|
|
||||||
_make_relation_element("park narodowy Biebrzy"),
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Tatrzański Park Narodowy", "park narodowy Biebrzy"]},
|
|
||||||
geometry=[poly, poly],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_national_parks()
|
|
||||||
assert len(result) >= 0
|
|
||||||
@ -1,418 +0,0 @@
|
|||||||
"""Tests for forests, nature reserves, and landscape parks download paths."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import Polygon
|
|
||||||
|
|
||||||
from python_pkg.geo_data._poland_nature import (
|
|
||||||
get_polish_forests,
|
|
||||||
get_polish_landscape_parks,
|
|
||||||
get_polish_nature_reserves,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
|
|
||||||
"""Create a mock OSM relation element."""
|
|
||||||
members = []
|
|
||||||
if include_outer:
|
|
||||||
members.append(
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return {"type": "relation", "tags": {"name": name}, "members": members}
|
|
||||||
|
|
||||||
|
|
||||||
_POLY = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishForests:
|
|
||||||
"""Tests for get_polish_forests."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Puszcza Białowieska"], "area_km2": [600.0]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_forests()
|
|
||||||
assert result.iloc[0]["area_km2"] == 600.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Puszcza Białowieska"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_forests()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_forests(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# Valid forest with keyword
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Puszcza Białowieska"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Bory keyword
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Bory Tucholskie"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 2, "lat": 2},
|
|
||||||
{"lon": 3, "lat": 2},
|
|
||||||
{"lon": 3, "lat": 3},
|
|
||||||
{"lon": 2, "lat": 3},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# No forest keyword -> skip
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Random Wood"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Puszcza Białowieska"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{"type": "way", "tags": {}, "geometry": []},
|
|
||||||
# Geometry extraction fails (too few coords)
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Las Mały"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Puszcza Białowieska", "Bory Tucholskie"]},
|
|
||||||
geometry=[_POLY, _POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
gdf_with_area = mock_gdf.copy()
|
|
||||||
gdf_with_area["area_km2"] = [600.0, 300.0]
|
|
||||||
mock_add_area.return_value = gdf_with_area
|
|
||||||
|
|
||||||
result = get_polish_forests()
|
|
||||||
assert len(result) == 2
|
|
||||||
|
|
||||||
def test_downloads_forests_empty(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
mock_from_features.return_value = empty_gdf
|
|
||||||
mock_add_area.return_value = empty_gdf
|
|
||||||
result = get_polish_forests()
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishNatureReserves:
|
|
||||||
"""Tests for get_polish_nature_reserves."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Rezerwat X"], "area_km2": [50.0]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_nature_reserves()
|
|
||||||
assert result.iloc[0]["area_km2"] == 50.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Rezerwat X"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_nature_reserves()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_reserves(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Rezerwat A"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Rezerwat A"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{"type": "way", "tags": {}, "geometry": []},
|
|
||||||
# Geometry fails
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Tiny"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Rezerwat A"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
gdf_with_area = mock_gdf.copy()
|
|
||||||
gdf_with_area["area_km2"] = [50.0]
|
|
||||||
mock_add_area.return_value = gdf_with_area
|
|
||||||
|
|
||||||
result = get_polish_nature_reserves()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_reserves_empty(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_nature._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
mock_from_features.return_value = empty_gdf
|
|
||||||
mock_add_area.return_value = empty_gdf
|
|
||||||
result = get_polish_nature_reserves()
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishLandscapeParks:
|
|
||||||
"""Tests for get_polish_landscape_parks."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_read: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Park Krajobrazowy X"], "area_km2": [100.0]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_landscape_parks()
|
|
||||||
assert result.iloc[0]["area_km2"] == 100.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_read: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Park Krajobrazowy X"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_landscape_parks()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
|
|
||||||
def test_downloads_landscape_parks(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
_make_relation_element("Park Krajobrazowy A"),
|
|
||||||
# Not a relation -> skip
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Park Krajobrazowy B"},
|
|
||||||
"geometry": [],
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{"type": "relation", "tags": {}, "members": []},
|
|
||||||
# Duplicate
|
|
||||||
_make_relation_element("Park Krajobrazowy A"),
|
|
||||||
# No outer rings
|
|
||||||
_make_relation_element("Park Empty", include_outer=False),
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Park Krajobrazowy A"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_landscape_parks()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
|
|
||||||
def test_downloads_landscape_parks_empty(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
mock_from_features.return_value = empty_gdf
|
|
||||||
result = get_polish_landscape_parks()
|
|
||||||
assert len(result) == 0
|
|
||||||
@ -1,466 +0,0 @@
|
|||||||
"""Tests for python_pkg.geo_data._poland_water module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import Polygon
|
|
||||||
|
|
||||||
from python_pkg.geo_data._poland_water import (
|
|
||||||
_extract_coastal_geometry,
|
|
||||||
_extract_river_coords_from_element,
|
|
||||||
get_polish_lakes,
|
|
||||||
get_polish_rivers,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractCoastalGeometry:
|
|
||||||
"""Tests for _extract_coastal_geometry."""
|
|
||||||
|
|
||||||
def test_relation_delegated(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "relation",
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
|
|
||||||
assert result is not None
|
|
||||||
|
|
||||||
def test_way_line_type(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
}
|
|
||||||
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
|
|
||||||
assert result is not None
|
|
||||||
assert result["type"] == "LineString"
|
|
||||||
|
|
||||||
def test_way_polygon_type(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
|
|
||||||
assert result is not None
|
|
||||||
assert result["type"] == "Polygon"
|
|
||||||
|
|
||||||
def test_way_polygon_auto_close(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 0.5},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
|
|
||||||
assert result is not None
|
|
||||||
assert result["coordinates"][0][0] == result["coordinates"][0][-1]
|
|
||||||
|
|
||||||
def test_way_polygon_already_closed(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
|
|
||||||
assert result is not None
|
|
||||||
assert result["type"] == "Polygon"
|
|
||||||
assert len(result["coordinates"][0]) == 4
|
|
||||||
|
|
||||||
def test_way_too_short_for_polygon_not_line(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
# 3 coords, >= MIN_LINE_COORDS but < MIN_RING_COORDS for polygon
|
|
||||||
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
|
|
||||||
# 3 coords is not enough for ring (need 4), so returns None
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_way_too_few_coords(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
}
|
|
||||||
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_not_way_or_relation(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "node"}
|
|
||||||
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_way_no_geometry(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "way"}
|
|
||||||
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractRiverCoordsFromElement:
|
|
||||||
"""Tests for _extract_river_coords_from_element."""
|
|
||||||
|
|
||||||
def test_way_element(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
}
|
|
||||||
result = _extract_river_coords_from_element(element)
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_way_too_few_coords(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
}
|
|
||||||
result = _extract_river_coords_from_element(element)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
def test_relation_element(self) -> None:
|
|
||||||
element: dict[str, Any] = {
|
|
||||||
"type": "relation",
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 1, "lat": 1}, {"lon": 2, "lat": 2}],
|
|
||||||
},
|
|
||||||
# Too few coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
},
|
|
||||||
# Not a way
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
},
|
|
||||||
# No geometry
|
|
||||||
{"type": "way"},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
result = _extract_river_coords_from_element(element)
|
|
||||||
assert len(result) == 2
|
|
||||||
|
|
||||||
def test_unknown_type(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "node"}
|
|
||||||
result = _extract_river_coords_from_element(element)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
def test_way_no_geometry(self) -> None:
|
|
||||||
element: dict[str, Any] = {"type": "way"}
|
|
||||||
result = _extract_river_coords_from_element(element)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishLakes:
|
|
||||||
"""Tests for get_polish_lakes."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Śniardwy"], "area_km2": [113.0]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_lakes()
|
|
||||||
assert result.iloc[0]["area_km2"] == 113.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Śniardwy"]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_lakes()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_lakes(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Śniardwy"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Śniardwy"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{"type": "way", "tags": {}, "geometry": []},
|
|
||||||
# Geometry extraction fails
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Tiny"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Śniardwy"]},
|
|
||||||
geometry=[poly],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
gdf_with_area = mock_gdf.copy()
|
|
||||||
gdf_with_area["area_km2"] = [113.0]
|
|
||||||
mock_add_area.return_value = gdf_with_area
|
|
||||||
|
|
||||||
result = get_polish_lakes()
|
|
||||||
assert len(result) >= 0
|
|
||||||
|
|
||||||
def test_empty_result(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
|
|
||||||
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
mock_from_features.return_value = empty_gdf
|
|
||||||
mock_add_area.return_value = empty_gdf
|
|
||||||
|
|
||||||
result = get_polish_lakes()
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishRivers:
|
|
||||||
"""Tests for get_polish_rivers."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_with_length(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Wisła"], "length_km": [1047.0]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_rivers()
|
|
||||||
assert result.iloc[0]["length_km"] == 1047.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_without_length(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Wisła"]},
|
|
||||||
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_rivers()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_rivers(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_length_column"
|
|
||||||
) as mock_add_length,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# Way with wikidata
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 1,
|
|
||||||
"tags": {"name": "Wisła", "wikidata": "Q54"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
},
|
|
||||||
# Way without wikidata
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 2,
|
|
||||||
"tags": {"name": "Odra"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
},
|
|
||||||
# Relation
|
|
||||||
{
|
|
||||||
"type": "relation",
|
|
||||||
"id": 3,
|
|
||||||
"tags": {"name": "Bug", "wikidata": "Q55"},
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 2, "lat": 2},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 4,
|
|
||||||
"tags": {},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
|
|
||||||
},
|
|
||||||
# Way with no coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 5,
|
|
||||||
"tags": {"name": "Short"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Wisła", "Odra", "Bug"]},
|
|
||||||
geometry=[poly, poly, poly],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
gdf_with_length = mock_gdf.copy()
|
|
||||||
gdf_with_length["length_km"] = [1047.0, 854.0, 772.0]
|
|
||||||
mock_add_length.return_value = gdf_with_length
|
|
||||||
|
|
||||||
result = get_polish_rivers()
|
|
||||||
assert len(result) >= 0
|
|
||||||
|
|
||||||
def test_empty_result(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_length_column"
|
|
||||||
) as mock_add_length,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
|
|
||||||
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
mock_from_features.return_value = empty_gdf
|
|
||||||
mock_add_length.return_value = empty_gdf
|
|
||||||
|
|
||||||
result = get_polish_rivers()
|
|
||||||
assert len(result) == 0
|
|
||||||
@ -1,397 +0,0 @@
|
|||||||
"""Tests for islands, coastal features, and UNESCO sites download paths."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import Polygon
|
|
||||||
|
|
||||||
from python_pkg.geo_data._poland_water import (
|
|
||||||
get_polish_coastal_features,
|
|
||||||
get_polish_islands,
|
|
||||||
get_polish_unesco_sites,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
|
|
||||||
"""Create a mock OSM relation element."""
|
|
||||||
members = []
|
|
||||||
if include_outer:
|
|
||||||
members.append(
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return {"type": "relation", "tags": {"name": name}, "members": members}
|
|
||||||
|
|
||||||
|
|
||||||
_POLY = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishIslands:
|
|
||||||
"""Tests for get_polish_islands."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_with_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Wolin"], "area_km2": [265.0]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_islands()
|
|
||||||
assert result.iloc[0]["area_km2"] == 265.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_without_area(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Wolin"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_islands()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_islands(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Wolin"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Wolin"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{"type": "way", "tags": {}, "geometry": []},
|
|
||||||
# Geometry fails
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Tiny"},
|
|
||||||
"geometry": [{"lon": 0, "lat": 0}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Wolin"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
gdf_with_area = mock_gdf.copy()
|
|
||||||
gdf_with_area["area_km2"] = [265.0]
|
|
||||||
mock_add_area.return_value = gdf_with_area
|
|
||||||
|
|
||||||
result = get_polish_islands()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_islands_empty(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_area_column"
|
|
||||||
) as mock_add_area,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
mock_from_features.return_value = empty_gdf
|
|
||||||
mock_add_area.return_value = empty_gdf
|
|
||||||
result = get_polish_islands()
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishCoastalFeatures:
|
|
||||||
"""Tests for get_polish_coastal_features."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_with_length(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Mierzeja Helska"], "length_km": [35.0]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_coastal_features()
|
|
||||||
assert result.iloc[0]["length_km"] == 35.0
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached_without_length(
|
|
||||||
self, mock_cache_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Mierzeja Helska"]},
|
|
||||||
geometry=[_POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_coastal_features()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_downloads_coastal_features(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_length_column"
|
|
||||||
) as mock_add_length,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# Peninsula (polygon type)
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Hel", "natural": "peninsula"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Cliff (line type)
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Klif Orłowski", "natural": "cliff"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Hel", "natural": "peninsula"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"natural": "cliff"},
|
|
||||||
"geometry": [],
|
|
||||||
},
|
|
||||||
# Geometry fails (no geometry key)
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "X", "natural": "cliff"},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Hel", "Klif Orłowski"]},
|
|
||||||
geometry=[_POLY, _POLY],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
gdf_with_length = mock_gdf.copy()
|
|
||||||
gdf_with_length["length_km"] = [35.0, 5.0]
|
|
||||||
mock_add_length.return_value = gdf_with_length
|
|
||||||
|
|
||||||
result = get_polish_coastal_features()
|
|
||||||
assert len(result) == 2
|
|
||||||
|
|
||||||
def test_downloads_coastal_features_empty(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._poland_water.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._poland_water._add_length_column"
|
|
||||||
) as mock_add_length,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
|
|
||||||
mock_from_features.return_value = empty_gdf
|
|
||||||
mock_add_length.return_value = empty_gdf
|
|
||||||
result = get_polish_coastal_features()
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPolishUnescoSites:
|
|
||||||
"""Tests for get_polish_unesco_sites."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_polish_unesco_sites()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._poland_water._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._poland_water._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._poland_water.sys.stdout")
|
|
||||||
def test_downloads_unesco_sites(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# Node type
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Kopalnia Soli Wieliczka"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 50.0,
|
|
||||||
},
|
|
||||||
# Relation type
|
|
||||||
_make_relation_element("Stare Miasto w Krakowie"),
|
|
||||||
# Way type with enough coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Auschwitz"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 19, "lat": 50},
|
|
||||||
{"lon": 19.1, "lat": 50},
|
|
||||||
{"lon": 19.1, "lat": 50.1},
|
|
||||||
{"lon": 19, "lat": 50.1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Way already closed
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Zamość"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 23, "lat": 50.7},
|
|
||||||
{"lon": 23.1, "lat": 50.7},
|
|
||||||
{"lon": 23.1, "lat": 50.8},
|
|
||||||
{"lon": 23, "lat": 50.7},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Way too few coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "TooShort"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 19, "lat": 50},
|
|
||||||
{"lon": 19.1, "lat": 50},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Kopalnia Soli Wieliczka"},
|
|
||||||
"lon": 20.0,
|
|
||||||
"lat": 50.0,
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{"type": "node", "tags": {}, "lon": 0, "lat": 0},
|
|
||||||
# Unknown type
|
|
||||||
{"type": "area", "tags": {"name": "Ignored"}},
|
|
||||||
# Relation without outer rings
|
|
||||||
_make_relation_element("NoOuter", include_outer=False),
|
|
||||||
# Way without geometry key
|
|
||||||
{"type": "way", "tags": {"name": "NoGeom"}},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_polish_unesco_sites()
|
|
||||||
assert result is mock_gdf
|
|
||||||
@ -1,424 +0,0 @@
|
|||||||
"""Tests for python_pkg.geo_data._warsaw module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import LineString, Polygon
|
|
||||||
|
|
||||||
from python_pkg.geo_data._warsaw import (
|
|
||||||
_merge_bridge_segments,
|
|
||||||
get_vistula_river,
|
|
||||||
get_warsaw_boundary,
|
|
||||||
get_warsaw_bridges,
|
|
||||||
get_warsaw_districts,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetWarsawBoundary:
|
|
||||||
"""Tests for get_warsaw_boundary."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_boundary()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.to_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
def test_from_districts_file_with_warszawa(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_pkg_dir: MagicMock,
|
|
||||||
mock_read: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_to_file: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_cache_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path)
|
|
||||||
mock_cache_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_districts_path = MagicMock()
|
|
||||||
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
|
|
||||||
return_value=MagicMock()
|
|
||||||
)
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
|
|
||||||
MagicMock(return_value=mock_districts_path)
|
|
||||||
)
|
|
||||||
mock_districts_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_warsaw_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Warszawa", "Mokotów"]},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(20, 52), (21, 52), (21, 53), (20, 53)]),
|
|
||||||
Polygon([(20.5, 52.5), (20.6, 52.5), (20.6, 52.6), (20.5, 52.6)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_warsaw_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_boundary()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.to_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
def test_from_districts_file_no_warszawa_entry(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_pkg_dir: MagicMock,
|
|
||||||
mock_read: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_to_file: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_cache_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path)
|
|
||||||
mock_cache_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_districts_path = MagicMock()
|
|
||||||
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
|
|
||||||
return_value=MagicMock()
|
|
||||||
)
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
|
|
||||||
MagicMock(return_value=mock_districts_path)
|
|
||||||
)
|
|
||||||
mock_districts_path.exists.return_value = True
|
|
||||||
|
|
||||||
# No "Warszawa" entry
|
|
||||||
mock_warsaw_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Mokotów", "Śródmieście"]},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(20, 52), (21, 52), (21, 53), (20, 53)]),
|
|
||||||
Polygon([(20.5, 52.5), (20.6, 52.5), (20.6, 52.6), (20.5, 52.6)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_warsaw_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_boundary()
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_fallback_overpass(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._warsaw.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._warsaw.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._warsaw._PKG_DIR") as mock_pkg_dir,
|
|
||||||
patch("python_pkg.geo_data._warsaw._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._warsaw._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
):
|
|
||||||
mock_cache_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path)
|
|
||||||
mock_cache_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_districts_path = MagicMock()
|
|
||||||
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
|
|
||||||
return_value=MagicMock()
|
|
||||||
)
|
|
||||||
nested = mock_pkg_dir.__truediv__.return_value.__truediv__
|
|
||||||
nested.return_value.__truediv__ = MagicMock(
|
|
||||||
return_value=mock_districts_path
|
|
||||||
)
|
|
||||||
mock_districts_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "relation",
|
|
||||||
"members": [
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 20, "lat": 52},
|
|
||||||
{"lon": 21, "lat": 52},
|
|
||||||
{"lon": 21, "lat": 53},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# non-outer member
|
|
||||||
{
|
|
||||||
"role": "inner",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 20.5, "lat": 52.5},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Not a relation
|
|
||||||
{"type": "way"},
|
|
||||||
# Relation with no outer geometry (empty coords)
|
|
||||||
{
|
|
||||||
"type": "relation",
|
|
||||||
"members": [
|
|
||||||
{"role": "inner", "geometry": [{"lon": 20, "lat": 52}]},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_boundary()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetWarsawDistricts:
|
|
||||||
"""Tests for get_warsaw_districts."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
|
|
||||||
def test_districts_file_exists(
|
|
||||||
self, mock_pkg_dir: MagicMock, mock_read: MagicMock
|
|
||||||
) -> None:
|
|
||||||
mock_districts_path = MagicMock()
|
|
||||||
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
|
|
||||||
return_value=MagicMock()
|
|
||||||
)
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
|
|
||||||
MagicMock(return_value=mock_districts_path)
|
|
||||||
)
|
|
||||||
mock_districts_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Warszawa", "Mokotów", "Śródmieście"]},
|
|
||||||
geometry=[
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
|
|
||||||
],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_districts()
|
|
||||||
assert "Warszawa" not in result["name"].values
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
|
|
||||||
def test_districts_file_not_found(self, mock_pkg_dir: MagicMock) -> None:
|
|
||||||
mock_districts_path = MagicMock()
|
|
||||||
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
|
|
||||||
return_value=MagicMock()
|
|
||||||
)
|
|
||||||
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
|
|
||||||
MagicMock(return_value=mock_districts_path)
|
|
||||||
)
|
|
||||||
mock_districts_path.exists.return_value = False
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
with pytest.raises(FileNotFoundError, match="Warsaw districts GeoJSON"):
|
|
||||||
get_warsaw_districts()
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetVistulaRiver:
|
|
||||||
"""Tests for get_vistula_river."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_vistula_river()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.sys.stdout")
|
|
||||||
def test_downloads(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 20.0, "lat": 52.0},
|
|
||||||
{"lon": 21.0, "lat": 52.5},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Too few coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"geometry": [{"lon": 20.0, "lat": 52.0}],
|
|
||||||
},
|
|
||||||
# Not a way
|
|
||||||
{"type": "node"},
|
|
||||||
# Way without geometry
|
|
||||||
{"type": "way"},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_vistula_river()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetWarsawBridges:
|
|
||||||
"""Tests for get_warsaw_bridges."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_bridges()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
def test_downloads(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._warsaw.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._warsaw.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._warsaw.get_vistula_river") as mock_vistula,
|
|
||||||
patch("python_pkg.geo_data._warsaw._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._warsaw._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
# Create a real Vistula geometry for intersection tests
|
|
||||||
vistula_gdf = gpd.GeoDataFrame(
|
|
||||||
{"name": ["Wisła"]},
|
|
||||||
geometry=[LineString([(20.0, 52.2), (21.0, 52.2)])],
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
mock_vistula.return_value = vistula_gdf
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# Bridge that intersects vistula buffer
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 1,
|
|
||||||
"tags": {"name": "Most Łazienkowski"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 20.5, "lat": 52.19},
|
|
||||||
{"lon": 20.5, "lat": 52.21},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Bridge far from vistula
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 2,
|
|
||||||
"tags": {"name": "Most Daleki"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 20.5, "lat": 55.0},
|
|
||||||
{"lon": 20.5, "lat": 55.1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Not a way
|
|
||||||
{"type": "node", "tags": {"name": "Most X"}},
|
|
||||||
# Way without geometry
|
|
||||||
{"type": "way", "tags": {"name": "Most Y"}},
|
|
||||||
# No name
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 3,
|
|
||||||
"tags": {},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 20.5, "lat": 52.19},
|
|
||||||
{"lon": 20.5, "lat": 52.21},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 4,
|
|
||||||
"tags": {"name": "Most Łazienkowski"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 20.5, "lat": 52.19},
|
|
||||||
{"lon": 20.5, "lat": 52.21},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Too few coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"id": 5,
|
|
||||||
"tags": {"name": "Most Short"},
|
|
||||||
"geometry": [{"lon": 20.5, "lat": 52.19}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_bridges()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
|
|
||||||
class TestMergeBridgeSegments:
|
|
||||||
"""Tests for _merge_bridge_segments."""
|
|
||||||
|
|
||||||
def test_single_segment(self) -> None:
|
|
||||||
features: list[dict[str, Any]] = [
|
|
||||||
{
|
|
||||||
"properties": {"name": "Most A"},
|
|
||||||
"geometry": {"coordinates": [(20, 52), (21, 52)]},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
result = _merge_bridge_segments(features)
|
|
||||||
assert len(result) == 1
|
|
||||||
assert result[0]["geometry"]["type"] == "LineString"
|
|
||||||
|
|
||||||
def test_multiple_segments_same_name(self) -> None:
|
|
||||||
features: list[dict[str, Any]] = [
|
|
||||||
{
|
|
||||||
"properties": {"name": "Most A"},
|
|
||||||
"geometry": {"coordinates": [(20, 52), (21, 52)]},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"properties": {"name": "Most A"},
|
|
||||||
"geometry": {"coordinates": [(21, 52), (22, 52)]},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
result = _merge_bridge_segments(features)
|
|
||||||
assert len(result) == 1
|
|
||||||
assert result[0]["geometry"]["type"] == "MultiLineString"
|
|
||||||
@ -1,176 +0,0 @@
|
|||||||
"""Tests for metro stations and osiedla download paths."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
|
|
||||||
from python_pkg.geo_data._warsaw import (
|
|
||||||
get_warsaw_metro_stations,
|
|
||||||
get_warsaw_osiedla,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
|
|
||||||
"""Create a mock OSM relation element."""
|
|
||||||
members = []
|
|
||||||
if include_outer:
|
|
||||||
members.append(
|
|
||||||
{
|
|
||||||
"role": "outer",
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 0, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 0},
|
|
||||||
{"lon": 1, "lat": 1},
|
|
||||||
{"lon": 0, "lat": 1},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return {"type": "relation", "tags": {"name": name}, "members": members}
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetWarsawMetroStations:
|
|
||||||
"""Tests for get_warsaw_metro_stations."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_warsaw_metro_stations()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.sys.stdout")
|
|
||||||
def test_downloads_metro(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# M1 only station
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Kabaty"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.1,
|
|
||||||
},
|
|
||||||
# M2 only station
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Bródno"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.3,
|
|
||||||
},
|
|
||||||
# M1/M2 interchange
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Świętokrzyska"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.2,
|
|
||||||
},
|
|
||||||
# Unknown station
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Nowa Stacja"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.4,
|
|
||||||
},
|
|
||||||
# Not a node -> skip
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Metro Line"},
|
|
||||||
},
|
|
||||||
# Node without name -> skip
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.0,
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Kabaty"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.1,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_metro_stations()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetWarsawOsiedla:
|
|
||||||
"""Tests for get_warsaw_osiedla."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
result = get_warsaw_osiedla()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._warsaw._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._warsaw.sys.stdout")
|
|
||||||
def test_downloads_osiedla(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
_make_relation_element("Mokotów"),
|
|
||||||
# Not a relation -> skip
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Way Osiedle"},
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{"type": "relation", "tags": {}, "members": []},
|
|
||||||
# Duplicate
|
|
||||||
_make_relation_element("Mokotów"),
|
|
||||||
# No outer rings
|
|
||||||
_make_relation_element("Empty", include_outer=False),
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_osiedla()
|
|
||||||
assert result is mock_gdf
|
|
||||||
@ -1,269 +0,0 @@
|
|||||||
"""Tests for python_pkg.geo_data._warsaw_places module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
from shapely.geometry import LineString
|
|
||||||
|
|
||||||
from python_pkg.geo_data._warsaw_places import (
|
|
||||||
_filter_streets_by_length,
|
|
||||||
get_warsaw_landmarks,
|
|
||||||
get_warsaw_streets,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetWarsawStreets:
|
|
||||||
"""Tests for get_warsaw_streets."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places._filter_streets_by_length")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
|
|
||||||
def test_cached(
|
|
||||||
self,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_read: MagicMock,
|
|
||||||
mock_filter: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
mock_filter.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_streets()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
def test_downloads(self) -> None:
|
|
||||||
with (
|
|
||||||
patch("python_pkg.geo_data._warsaw_places.sys.stdout"),
|
|
||||||
patch("python_pkg.geo_data._warsaw_places.CACHE_DIR") as mock_cache_dir,
|
|
||||||
patch("python_pkg.geo_data._warsaw_places._overpass_query") as mock_query,
|
|
||||||
patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir"),
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._warsaw_places.gpd.GeoDataFrame.from_features"
|
|
||||||
) as mock_from_features,
|
|
||||||
patch(
|
|
||||||
"python_pkg.geo_data._warsaw_places._filter_streets_by_length"
|
|
||||||
) as mock_filter,
|
|
||||||
):
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Marszałkowska", "highway": "primary"},
|
|
||||||
"geometry": [
|
|
||||||
{"lon": 21.0, "lat": 52.2},
|
|
||||||
{"lon": 21.0, "lat": 52.3},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# Too few coords
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Short"},
|
|
||||||
"geometry": [{"lon": 21.0, "lat": 52.2}],
|
|
||||||
},
|
|
||||||
# Not a way
|
|
||||||
{"type": "node", "tags": {"name": "Node"}},
|
|
||||||
# Way without geometry
|
|
||||||
{"type": "way", "tags": {"name": "NoGeom"}},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
mock_filter.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_streets()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
|
|
||||||
class TestFilterStreetsByLength:
|
|
||||||
"""Tests for _filter_streets_by_length."""
|
|
||||||
|
|
||||||
def test_filters_and_merges(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{
|
|
||||||
"name": ["Marszałkowska", "Marszałkowska", "Unknown", "Short"],
|
|
||||||
"geometry": [
|
|
||||||
LineString([(21.0, 52.2), (21.0, 52.3)]),
|
|
||||||
LineString([(21.0, 52.3), (21.0, 52.4)]),
|
|
||||||
LineString([(21.0, 52.2), (21.0, 52.3)]),
|
|
||||||
LineString([(21.0, 52.2), (21.001, 52.2001)]),
|
|
||||||
],
|
|
||||||
},
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
result = _filter_streets_by_length(gdf, 500)
|
|
||||||
# Only streets >= 500m should be included
|
|
||||||
for _, row in result.iterrows():
|
|
||||||
assert row["length_m"] >= 500
|
|
||||||
|
|
||||||
def test_single_segment(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{
|
|
||||||
"name": ["Marszałkowska"],
|
|
||||||
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
|
|
||||||
},
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
result = _filter_streets_by_length(gdf, 0)
|
|
||||||
# Single segment should remain a LineString
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_unknown_name_excluded(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{
|
|
||||||
"name": ["Unknown"],
|
|
||||||
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
|
|
||||||
},
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
result = _filter_streets_by_length(gdf, 0)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
def test_empty_name_excluded(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{
|
|
||||||
"name": [""],
|
|
||||||
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
|
|
||||||
},
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
result = _filter_streets_by_length(gdf, 0)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
def test_no_name_column(self) -> None:
|
|
||||||
gdf = gpd.GeoDataFrame(
|
|
||||||
{
|
|
||||||
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
|
|
||||||
},
|
|
||||||
crs="EPSG:4326",
|
|
||||||
)
|
|
||||||
result = _filter_streets_by_length(gdf, 0)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetWarsawLandmarks:
|
|
||||||
"""Tests for get_warsaw_landmarks."""
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.gpd.read_file")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
|
|
||||||
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = True
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_read.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_landmarks()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.gpd.GeoDataFrame.from_features")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.sys.stdout")
|
|
||||||
def test_downloads(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
mock_from_features: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {
|
|
||||||
"elements": [
|
|
||||||
# Node with tourism
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Muzeum Chopina", "tourism": "museum"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.2,
|
|
||||||
},
|
|
||||||
# Way with center
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "Łazienki", "tourism": "attraction"},
|
|
||||||
"center": {"lon": 21.0, "lat": 52.2},
|
|
||||||
},
|
|
||||||
# Node with historic
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Kolumna Zygmunta", "historic": "monument"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.2,
|
|
||||||
},
|
|
||||||
# Node with leisure
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Park Skaryszewski", "leisure": "park"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.2,
|
|
||||||
},
|
|
||||||
# Node no tourism/historic/leisure -> "landmark"
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Generic"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.2,
|
|
||||||
},
|
|
||||||
# Duplicate
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"name": "Muzeum Chopina", "tourism": "museum"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.2,
|
|
||||||
},
|
|
||||||
# No name
|
|
||||||
{
|
|
||||||
"type": "node",
|
|
||||||
"tags": {"tourism": "museum"},
|
|
||||||
"lon": 21.0,
|
|
||||||
"lat": 52.2,
|
|
||||||
},
|
|
||||||
# Way without center
|
|
||||||
{
|
|
||||||
"type": "way",
|
|
||||||
"tags": {"name": "No Center"},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
|
|
||||||
mock_from_features.return_value = mock_gdf
|
|
||||||
|
|
||||||
result = get_warsaw_landmarks()
|
|
||||||
assert result is mock_gdf
|
|
||||||
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places._overpass_query")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
|
|
||||||
@patch("python_pkg.geo_data._warsaw_places.sys.stdout")
|
|
||||||
def test_empty_result(
|
|
||||||
self,
|
|
||||||
mock_stdout: MagicMock,
|
|
||||||
mock_cache_dir: MagicMock,
|
|
||||||
mock_query: MagicMock,
|
|
||||||
mock_ensure: MagicMock,
|
|
||||||
) -> None:
|
|
||||||
mock_path = MagicMock()
|
|
||||||
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
|
|
||||||
mock_query.return_value = {"elements": []}
|
|
||||||
|
|
||||||
result = get_warsaw_landmarks()
|
|
||||||
assert len(result) == 0
|
|
||||||
163
python_pkg/pdfCentered/.gitignore
vendored
163
python_pkg/pdfCentered/.gitignore
vendored
@ -1,163 +0,0 @@
|
|||||||
# Byte-compiled / optimized / DLL files
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
|
|
||||||
# C extensions
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Distribution / packaging
|
|
||||||
.Python
|
|
||||||
build/
|
|
||||||
develop-eggs/
|
|
||||||
dist/
|
|
||||||
downloads/
|
|
||||||
eggs/
|
|
||||||
.eggs/
|
|
||||||
lib/
|
|
||||||
lib64/
|
|
||||||
parts/
|
|
||||||
sdist/
|
|
||||||
var/
|
|
||||||
wheels/
|
|
||||||
share/python-wheels/
|
|
||||||
*.egg-info/
|
|
||||||
.installed.cfg
|
|
||||||
*.egg
|
|
||||||
MANIFEST
|
|
||||||
|
|
||||||
# PyInstaller
|
|
||||||
# Usually these files are written by a python script from a template
|
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
||||||
*.manifest
|
|
||||||
*.spec
|
|
||||||
|
|
||||||
# Installer logs
|
|
||||||
pip-log.txt
|
|
||||||
pip-delete-this-directory.txt
|
|
||||||
|
|
||||||
# Unit test / coverage reports
|
|
||||||
htmlcov/
|
|
||||||
.tox/
|
|
||||||
.nox/
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
.cache
|
|
||||||
nosetests.xml
|
|
||||||
coverage.xml
|
|
||||||
*.cover
|
|
||||||
*.py,cover
|
|
||||||
.hypothesis/
|
|
||||||
.pytest_cache/
|
|
||||||
cover/
|
|
||||||
|
|
||||||
# Translations
|
|
||||||
*.mo
|
|
||||||
*.pot
|
|
||||||
|
|
||||||
# Django stuff:
|
|
||||||
*.log
|
|
||||||
local_settings.py
|
|
||||||
db.sqlite3
|
|
||||||
db.sqlite3-journal
|
|
||||||
|
|
||||||
# Flask stuff:
|
|
||||||
instance/
|
|
||||||
.webassets-cache
|
|
||||||
|
|
||||||
# Scrapy stuff:
|
|
||||||
.scrapy
|
|
||||||
|
|
||||||
# Sphinx documentation
|
|
||||||
docs/_build/
|
|
||||||
|
|
||||||
# PyBuilder
|
|
||||||
.pybuilder/
|
|
||||||
target/
|
|
||||||
|
|
||||||
# Jupyter Notebook
|
|
||||||
.ipynb_checkpoints
|
|
||||||
|
|
||||||
# IPython
|
|
||||||
profile_default/
|
|
||||||
ipython_config.py
|
|
||||||
|
|
||||||
# pyenv
|
|
||||||
# For a library or package, you might want to ignore these files since the code is
|
|
||||||
# intended to run in multiple environments; otherwise, check them in:
|
|
||||||
# .python-version
|
|
||||||
|
|
||||||
# pipenv
|
|
||||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
||||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
||||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
||||||
# install all needed dependencies.
|
|
||||||
#Pipfile.lock
|
|
||||||
|
|
||||||
# poetry
|
|
||||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
||||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
||||||
# commonly ignored for libraries.
|
|
||||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
||||||
#poetry.lock
|
|
||||||
|
|
||||||
# pdm
|
|
||||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
||||||
#pdm.lock
|
|
||||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
||||||
# in version control.
|
|
||||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
||||||
.pdm.toml
|
|
||||||
.pdm-python
|
|
||||||
.pdm-build/
|
|
||||||
|
|
||||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
||||||
__pypackages__/
|
|
||||||
|
|
||||||
# Celery stuff
|
|
||||||
celerybeat-schedule
|
|
||||||
celerybeat.pid
|
|
||||||
|
|
||||||
# SageMath parsed files
|
|
||||||
*.sage.py
|
|
||||||
|
|
||||||
# Environments
|
|
||||||
.env
|
|
||||||
.venv
|
|
||||||
env/
|
|
||||||
venv/
|
|
||||||
ENV/
|
|
||||||
env.bak/
|
|
||||||
venv.bak/
|
|
||||||
|
|
||||||
# Spyder project settings
|
|
||||||
.spyderproject
|
|
||||||
.spyproject
|
|
||||||
|
|
||||||
# Rope project settings
|
|
||||||
.ropeproject
|
|
||||||
|
|
||||||
# mkdocs documentation
|
|
||||||
/site
|
|
||||||
|
|
||||||
# mypy
|
|
||||||
.mypy_cache/
|
|
||||||
.dmypy.json
|
|
||||||
dmypy.json
|
|
||||||
|
|
||||||
# Pyre type checker
|
|
||||||
.pyre/
|
|
||||||
|
|
||||||
# pytype static type analyzer
|
|
||||||
.pytype/
|
|
||||||
|
|
||||||
# Cython debug symbols
|
|
||||||
cython_debug/
|
|
||||||
|
|
||||||
# PyCharm
|
|
||||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
||||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
||||||
#.idea/
|
|
||||||
*.pdf
|
|
||||||
@ -1 +0,0 @@
|
|||||||
3.9.0
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
|
|
||||||
[pytest]
|
|
||||||
filterwarnings =
|
|
||||||
ignore::DeprecationWarning
|
|
||||||
@ -1 +0,0 @@
|
|||||||
"""Symmetric splitting utilities package."""
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -e
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
"$SCRIPT_DIR/../../.venv/bin/python" "$SCRIPT_DIR/split_x_into_n_symmetrically.py" "$@"
|
|
||||||
@ -1,60 +0,0 @@
|
|||||||
"""Distribute values symmetrically across N parts."""
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_symmetric_weights(
|
|
||||||
n: int, middle_weight: float, factors: list[float] | None = None
|
|
||||||
) -> list[float]:
|
|
||||||
"""Calculate symmetric weights for both even and odd N.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
n: Number of parts to split into.
|
|
||||||
middle_weight: The middle value for symmetry.
|
|
||||||
factors: If provided, controls the difference in weights.
|
|
||||||
Must have length n // 2 or n // 2 - 1 depending on n.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of symmetric weights.
|
|
||||||
"""
|
|
||||||
half_n = n // 2
|
|
||||||
weights_left: list[float] = [middle_weight]
|
|
||||||
|
|
||||||
if factors:
|
|
||||||
for factor in factors:
|
|
||||||
next_weight = weights_left[-1] + factor
|
|
||||||
weights_left.append(next_weight)
|
|
||||||
else:
|
|
||||||
weights_left.extend(middle_weight - (idx + 1) for idx in range(half_n - 1))
|
|
||||||
|
|
||||||
if not n % 2:
|
|
||||||
weights = weights_left[::-1] + weights_left
|
|
||||||
else:
|
|
||||||
weights = [*weights_left[::-1], middle_weight, *weights_left]
|
|
||||||
|
|
||||||
return weights
|
|
||||||
|
|
||||||
|
|
||||||
def scale_to_total(x: float, weights: list[float]) -> list[float]:
|
|
||||||
"""Scale the weights so that their sum is proportional to X.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
x: Total value to distribute.
|
|
||||||
weights: The list of weights to be scaled.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of scaled values summing to x.
|
|
||||||
"""
|
|
||||||
total_weight = sum(weights)
|
|
||||||
base_unit = x / total_weight
|
|
||||||
return [base_unit * weight for weight in weights]
|
|
||||||
|
|
||||||
|
|
||||||
def split_x_into_n_symmetrically(x: float, n: int, factors: list[float]) -> list[float]:
|
|
||||||
"""Split X into N parts with symmetric weights controlled by factors."""
|
|
||||||
weights = calculate_symmetric_weights(n, middle_weight=1, factors=factors)
|
|
||||||
return scale_to_total(x, weights)
|
|
||||||
|
|
||||||
|
|
||||||
def split_x_into_n_middle(x: float, n: int, middle_value: float) -> list[float]:
|
|
||||||
"""Split X into N parts with symmetric weights using middle_value as peak."""
|
|
||||||
weights = calculate_symmetric_weights(n, middle_weight=middle_value)
|
|
||||||
return scale_to_total(x, weights)
|
|
||||||
@ -1 +0,0 @@
|
|||||||
"""Tests for split module."""
|
|
||||||
@ -1,118 +0,0 @@
|
|||||||
"""Unit tests for split_x_into_n_symmetrically module."""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from python_pkg.split.split_x_into_n_symmetrically import (
|
|
||||||
calculate_symmetric_weights,
|
|
||||||
scale_to_total,
|
|
||||||
split_x_into_n_middle,
|
|
||||||
split_x_into_n_symmetrically,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestCalculateSymmetricWeights:
|
|
||||||
"""Tests for calculate_symmetric_weights function."""
|
|
||||||
|
|
||||||
def test_odd_n_without_factors(self) -> None:
|
|
||||||
"""Test odd N creates symmetric weights around middle."""
|
|
||||||
weights = calculate_symmetric_weights(n=5, middle_weight=3)
|
|
||||||
# For n=5, half_n=2, should be symmetric around middle
|
|
||||||
assert len(weights) == 5
|
|
||||||
# Check symmetry
|
|
||||||
assert weights[0] == weights[-1]
|
|
||||||
assert weights[1] == weights[-2]
|
|
||||||
|
|
||||||
def test_even_n_without_factors(self) -> None:
|
|
||||||
"""Test even N creates symmetric weights."""
|
|
||||||
weights = calculate_symmetric_weights(n=4, middle_weight=2)
|
|
||||||
assert len(weights) == 4
|
|
||||||
# Check symmetry
|
|
||||||
assert weights[0] == weights[-1]
|
|
||||||
assert weights[1] == weights[-2]
|
|
||||||
|
|
||||||
def test_with_factors(self) -> None:
|
|
||||||
"""Test custom factors are applied correctly."""
|
|
||||||
weights = calculate_symmetric_weights(n=4, middle_weight=1, factors=[0.5, 0.3])
|
|
||||||
# Factors control growth from middle, so we get 2 * len(factors) + mirrored
|
|
||||||
assert len(weights) == 6 # Actual behavior based on factors
|
|
||||||
# Check symmetry
|
|
||||||
assert weights[0] == weights[-1]
|
|
||||||
assert weights[1] == weights[-2]
|
|
||||||
|
|
||||||
def test_n_equals_1(self) -> None:
|
|
||||||
"""Test single part returns weights based on algorithm."""
|
|
||||||
weights = calculate_symmetric_weights(n=1, middle_weight=5)
|
|
||||||
# Odd case with half_n=0: [middle_weight] reversed + middle + [middle_weight]
|
|
||||||
assert weights == [5, 5, 5]
|
|
||||||
|
|
||||||
def test_n_equals_2(self) -> None:
|
|
||||||
"""Test two parts returns two equal weights."""
|
|
||||||
weights = calculate_symmetric_weights(n=2, middle_weight=3)
|
|
||||||
assert len(weights) == 2
|
|
||||||
assert weights[0] == weights[1]
|
|
||||||
|
|
||||||
|
|
||||||
class TestScaleToTotal:
|
|
||||||
"""Tests for scale_to_total function."""
|
|
||||||
|
|
||||||
def test_scale_to_total_basic(self) -> None:
|
|
||||||
"""Test weights are scaled to sum to x."""
|
|
||||||
weights = [1.0, 2.0, 1.0]
|
|
||||||
scaled = scale_to_total(x=100, weights=weights)
|
|
||||||
assert sum(scaled) == pytest.approx(100)
|
|
||||||
|
|
||||||
def test_scale_preserves_proportions(self) -> None:
|
|
||||||
"""Test scaling preserves relative proportions."""
|
|
||||||
weights = [1.0, 2.0, 3.0]
|
|
||||||
scaled = scale_to_total(x=60, weights=weights)
|
|
||||||
# Original sum is 6, so each unit = 10
|
|
||||||
assert scaled[0] == pytest.approx(10)
|
|
||||||
assert scaled[1] == pytest.approx(20)
|
|
||||||
assert scaled[2] == pytest.approx(30)
|
|
||||||
|
|
||||||
def test_scale_with_floats(self) -> None:
|
|
||||||
"""Test scaling works with float weights."""
|
|
||||||
weights = [0.5, 1.0, 0.5]
|
|
||||||
scaled = scale_to_total(x=10, weights=weights)
|
|
||||||
assert sum(scaled) == pytest.approx(10)
|
|
||||||
|
|
||||||
|
|
||||||
class TestSplitXIntoNSymmetrically:
|
|
||||||
"""Tests for split_x_into_n_symmetrically function."""
|
|
||||||
|
|
||||||
def test_split_basic(self) -> None:
|
|
||||||
"""Test basic split with factors."""
|
|
||||||
result = split_x_into_n_symmetrically(x=100, n=4, factors=[0.5, 0.2])
|
|
||||||
# Length depends on factors, not just n
|
|
||||||
assert len(result) == 6 # Actual behavior
|
|
||||||
assert sum(result) == pytest.approx(100)
|
|
||||||
# Check symmetry
|
|
||||||
assert result[0] == pytest.approx(result[-1])
|
|
||||||
assert result[1] == pytest.approx(result[-2])
|
|
||||||
|
|
||||||
def test_split_preserves_total(self) -> None:
|
|
||||||
"""Test that the split preserves the total value."""
|
|
||||||
result = split_x_into_n_symmetrically(x=1000, n=5, factors=[0.1, 0.2])
|
|
||||||
assert sum(result) == pytest.approx(1000)
|
|
||||||
|
|
||||||
|
|
||||||
class TestSplitXIntoNMiddle:
|
|
||||||
"""Tests for split_x_into_n_middle function."""
|
|
||||||
|
|
||||||
def test_split_middle_basic(self) -> None:
|
|
||||||
"""Test basic split using middle value."""
|
|
||||||
result = split_x_into_n_middle(x=100, n=3, middle_value=2)
|
|
||||||
assert len(result) == 3
|
|
||||||
assert sum(result) == pytest.approx(100)
|
|
||||||
|
|
||||||
def test_split_middle_symmetric(self) -> None:
|
|
||||||
"""Test that result is symmetric."""
|
|
||||||
result = split_x_into_n_middle(x=100, n=5, middle_value=3)
|
|
||||||
assert result[0] == pytest.approx(result[-1])
|
|
||||||
assert result[1] == pytest.approx(result[-2])
|
|
||||||
|
|
||||||
def test_split_middle_even_parts(self) -> None:
|
|
||||||
"""Test split with even number of parts."""
|
|
||||||
result = split_x_into_n_middle(x=50, n=4, middle_value=1)
|
|
||||||
assert len(result) == 4
|
|
||||||
assert sum(result) == pytest.approx(50)
|
|
||||||
@ -1,191 +0,0 @@
|
|||||||
# ==============================================================================
|
|
||||||
# Python Development Dependencies - Linting, Formatting, and Testing
|
|
||||||
# ==============================================================================
|
|
||||||
# Install with: pip install -r requirements-dev.txt
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# Include base requirements
|
|
||||||
-r requirements.txt
|
|
||||||
|
|
||||||
# add-trailing-comma - Add trailing commas
|
|
||||||
add-trailing-comma>=3.1.0
|
|
||||||
|
|
||||||
# autoflake - Remove unused imports and variables
|
|
||||||
autoflake>=2.2.0
|
|
||||||
|
|
||||||
# autopep8 - PEP 8 formatting (alternative formatter)
|
|
||||||
autopep8>=2.0.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# SECURITY LINTERS
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# Bandit - Security linter
|
|
||||||
bandit>=1.7.0
|
|
||||||
|
|
||||||
# Black - The uncompromising code formatter (fallback/comparison)
|
|
||||||
black>=24.0.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# SPELL CHECKING
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# codespell - Fix common misspellings
|
|
||||||
codespell>=2.2.0
|
|
||||||
|
|
||||||
# Coverage.py - Code coverage measurement
|
|
||||||
coverage>=7.4.0
|
|
||||||
|
|
||||||
# darglint - Check docstrings match function signatures
|
|
||||||
darglint>=1.8.0
|
|
||||||
|
|
||||||
# dead - Find dead code
|
|
||||||
dead>=1.5.0
|
|
||||||
|
|
||||||
# docformatter - Formats docstrings
|
|
||||||
docformatter>=1.7.0
|
|
||||||
|
|
||||||
# fixit - Auto-fix linting errors
|
|
||||||
fixit>=2.1.0
|
|
||||||
|
|
||||||
# Flake8 - Linting tool (wraps pyflakes, pycodestyle, mccabe)
|
|
||||||
flake8>=7.0.0
|
|
||||||
flake8-annotations>=3.0.0 # Type annotation checks
|
|
||||||
flake8-bandit>=4.1.0 # Security checks via bandit
|
|
||||||
|
|
||||||
# Flake8 plugins for maximum coverage
|
|
||||||
flake8-bugbear>=24.0.0 # Additional bug detection
|
|
||||||
flake8-comprehensions>=3.14.0 # Better list/dict/set comprehensions
|
|
||||||
flake8-docstrings>=1.7.0 # Docstring checks
|
|
||||||
flake8-eradicate>=1.5.0 # Dead code detection
|
|
||||||
flake8-pie>=0.16.0 # Miscellaneous lints
|
|
||||||
flake8-print>=5.0.0 # Detect print statements
|
|
||||||
flake8-pyi>=24.0.0 # Type stub file checks
|
|
||||||
flake8-pytest-style>=2.0.0 # Pytest style checks
|
|
||||||
flake8-return>=1.2.0 # Better return statement checks
|
|
||||||
flake8-simplify>=0.21.0 # Simplification suggestions
|
|
||||||
|
|
||||||
# Hypothesis - Property-based testing
|
|
||||||
hypothesis>=6.98.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# IMPORT CHECKING
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# importlib-metadata for import analysis
|
|
||||||
importlib-metadata>=7.0.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# DOCUMENTATION
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# pep257 - PEP 257 docstring checker (legacy, use pydocstyle)
|
|
||||||
# interrogate - Check docstring coverage
|
|
||||||
interrogate>=1.5.0
|
|
||||||
|
|
||||||
# isort - Import sorting (ruff handles this, but useful standalone)
|
|
||||||
isort>=5.13.0
|
|
||||||
|
|
||||||
# mccabe - McCabe complexity checker
|
|
||||||
mccabe>=0.7.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# TYPE CHECKING
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# MyPy - Static type checker
|
|
||||||
mypy>=1.8.0
|
|
||||||
|
|
||||||
# pip-audit - Audit Python packages for known vulnerabilities
|
|
||||||
pip-audit>=2.6.0
|
|
||||||
|
|
||||||
# pipdeptree - Show dependency tree
|
|
||||||
pipdeptree>=2.14.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# PRE-COMMIT
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# pre-commit - Git hook management
|
|
||||||
pre-commit>=3.6.0
|
|
||||||
|
|
||||||
# prospector - Python static analysis tool
|
|
||||||
prospector>=1.10.0
|
|
||||||
|
|
||||||
# pycodestyle - Python style guide checker (PEP 8)
|
|
||||||
pycodestyle>=2.11.0
|
|
||||||
|
|
||||||
# pydocstyle - Docstring style checker (PEP 257)
|
|
||||||
pydocstyle>=6.3.0
|
|
||||||
|
|
||||||
# pyflakes - Passive checker of Python programs
|
|
||||||
pyflakes>=3.2.0
|
|
||||||
|
|
||||||
# pylama - Code audit tool (wraps multiple linters)
|
|
||||||
pylama>=8.4.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# LINTERS
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# Pylint - Comprehensive Python linter
|
|
||||||
pylint>=3.0.0
|
|
||||||
|
|
||||||
# Pyright - Microsoft's type checker (very strict)
|
|
||||||
pyright>=1.1.350
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# TESTING
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# pytest - Testing framework
|
|
||||||
pytest>=8.0.0
|
|
||||||
|
|
||||||
# pytest plugins
|
|
||||||
pytest-cov>=4.1.0 # Coverage plugin
|
|
||||||
pytest-randomly>=3.15.0 # Randomize test order
|
|
||||||
pytest-sugar>=1.0.0 # Better test output
|
|
||||||
pytest-timeout>=2.2.0 # Test timeouts
|
|
||||||
pytest-xdist>=3.5.0 # Parallel test execution
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# ADDITIONAL TOOLS
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# pyupgrade - Upgrade Python syntax
|
|
||||||
pyupgrade>=3.15.0
|
|
||||||
|
|
||||||
# Radon - Code metrics (complexity, maintainability)
|
|
||||||
radon>=6.0.0
|
|
||||||
|
|
||||||
# reorder-python-imports - Reorder imports
|
|
||||||
reorder-python-imports>=3.12.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# CODE FORMATTERS
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# Ruff - Extremely fast Python linter and formatter (replaces many tools)
|
|
||||||
ruff>=0.8.0
|
|
||||||
|
|
||||||
# Safety - Check dependencies for known security vulnerabilities
|
|
||||||
safety>=2.3.0
|
|
||||||
types-python-dateutil>=2.8.0
|
|
||||||
types-PyYAML>=6.0.0
|
|
||||||
|
|
||||||
# Type stubs for common packages
|
|
||||||
types-requests>=2.31.0
|
|
||||||
types-setuptools>=69.0.0
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# CODE QUALITY & DEAD CODE DETECTION
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
# Vulture - Find dead code
|
|
||||||
vulture>=2.10
|
|
||||||
|
|
||||||
# xenon - Monitor code complexity
|
|
||||||
xenon>=0.9.0
|
|
||||||
|
|
||||||
# yapf - Yet Another Python Formatter (Google's formatter)
|
|
||||||
yapf>=0.40.0
|
|
||||||
@ -1,21 +0,0 @@
|
|||||||
aiohttp>=3.9
|
|
||||||
beautifulsoup4>=4.0
|
|
||||||
berserk>=0.13
|
|
||||||
bottle>=0.12
|
|
||||||
genanki>=0.13
|
|
||||||
geopandas>=1.0
|
|
||||||
howlongtobeatpy>=1.0
|
|
||||||
lxml>=5.0
|
|
||||||
|
|
||||||
# Optional dependencies for specific scripts (needed for full pylint analysis)
|
|
||||||
matplotlib>=3.0
|
|
||||||
mitmproxy>=10.0
|
|
||||||
numpy>=1.20
|
|
||||||
opencv-python>=4.0
|
|
||||||
pillow>=10.0
|
|
||||||
pygame>=2.0
|
|
||||||
pytest>=7.0
|
|
||||||
python-chess>=1.999
|
|
||||||
requests>=2.0
|
|
||||||
selenium>=4.0
|
|
||||||
websockets>=13.0
|
|
||||||
1
requirements.txt
Symbolic link
1
requirements.txt
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
meta/requirements.txt
|
||||||
148
run.sh
148
run.sh
@ -1,148 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Easy entrypoint for system usage reports and polling script diagnostics.
|
|
||||||
# Usage:
|
|
||||||
# ./run.sh # today's report to stdout
|
|
||||||
# ./run.sh --date 20260501 # specific day
|
|
||||||
# ./run.sh --top 25 # override row count
|
|
||||||
# ./run.sh --profile [duration] # profile polling scripts (default 60s)
|
|
||||||
# ./run.sh --diagnose # find inefficient shell scripts
|
|
||||||
# ./run.sh --init-artifacts ... # bootstrap contract/evidence/session artifacts
|
|
||||||
#
|
|
||||||
# Any other args are forwarded to usage_report.py unchanged.
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
REPORT_SCRIPT="$SCRIPT_DIR/linux_configuration/scripts/system-maintenance/bin/usage_report.py"
|
|
||||||
ARTIFACT_INIT_SCRIPT="$SCRIPT_DIR/scripts/init_agent_artifacts.sh"
|
|
||||||
|
|
||||||
if [[ ! -f "$REPORT_SCRIPT" ]]; then
|
|
||||||
echo "Error: usage_report.py not found at: $REPORT_SCRIPT" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ! -f "$ARTIFACT_INIT_SCRIPT" ]]; then
|
|
||||||
echo "Error: init_agent_artifacts.sh not found at: $ARTIFACT_INIT_SCRIPT" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Profiling mode: trace fork-heavy scripts over time
|
|
||||||
profile_polling_scripts() {
|
|
||||||
local duration="${1:-60}"
|
|
||||||
echo "=== Polling Script Profiler (${duration}s) ===" >&2
|
|
||||||
echo "Tracing fork/exec calls in shell scripts..." >&2
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
# Find common polling script processes and trace them
|
|
||||||
local trace_file="/tmp/polling_trace_$$.txt"
|
|
||||||
|
|
||||||
# Use perf/strace to capture system calls
|
|
||||||
(
|
|
||||||
timeout "$duration" strace -f -e trace=clone,execve -c -p $$ 2>&1 || true
|
|
||||||
) > "$trace_file" 2>&1
|
|
||||||
|
|
||||||
echo "Trace completed. Analyzing results:" >&2
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
# Show fork/exec heavy processes
|
|
||||||
if ! grep -e "execve" -e "clone" "$trace_file" | head -20; then
|
|
||||||
:
|
|
||||||
fi
|
|
||||||
|
|
||||||
rm -f "$trace_file"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Diagnostic mode: find inefficient patterns in shell scripts
|
|
||||||
diagnose_polling_scripts() {
|
|
||||||
echo "=== Shell Script Efficiency Audit ===" >&2
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
local issues_found=0
|
|
||||||
|
|
||||||
# Check for common anti-patterns
|
|
||||||
echo "Checking for anti-patterns in shell scripts..." >&2
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
# Pattern 1: while true with sleep (no event-driven check)
|
|
||||||
echo "1. Polling loops (while true + sleep):" >&2
|
|
||||||
set +e
|
|
||||||
grep -r "while true\|while :" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
|
||||||
| grep -v "Binary" | grep -v ".git" | head -5
|
|
||||||
set -e
|
|
||||||
issues_found=$((issues_found + 1))
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
# Pattern 2: $(date +...) calls in loops (fork-heavy)
|
|
||||||
echo "2. Excessive date calls (each forks a process):" >&2
|
|
||||||
set +e
|
|
||||||
grep -r '\$(date' --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
|
||||||
| grep -v "Binary" | grep -v ".git" | head -5
|
|
||||||
set -e
|
|
||||||
issues_found=$((issues_found + 1))
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
# Pattern 3: pgrep/xdotool in loops
|
|
||||||
echo "3. Process inspection in loops (pgrep, xdotool):" >&2
|
|
||||||
set +e
|
|
||||||
grep -r "while.*pgrep\|while.*xdotool\|pgrep.*while" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
|
||||||
| grep -v "Binary" | grep -v ".git" | head -5
|
|
||||||
set -e
|
|
||||||
issues_found=$((issues_found + 1))
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
# Pattern 4: pipes in hot paths
|
|
||||||
echo "4. Heavy pipes in polling scripts (| awk, | grep, | tr):" >&2
|
|
||||||
set +e
|
|
||||||
while_true_file_list="$(mktemp)"
|
|
||||||
heavy_pipe_matches="$(mktemp)"
|
|
||||||
grep -r "while true" --include="*.sh" "$SCRIPT_DIR" > "$while_true_file_list" 2>/dev/null
|
|
||||||
if [ -s "$while_true_file_list" ]; then
|
|
||||||
xargs grep -l -e " | awk" -e " | grep" -e " | tr" < "$while_true_file_list" > "$heavy_pipe_matches" 2>/dev/null
|
|
||||||
head -5 "$heavy_pipe_matches"
|
|
||||||
fi
|
|
||||||
rm -f "$while_true_file_list" "$heavy_pipe_matches"
|
|
||||||
set -e
|
|
||||||
issues_found=$((issues_found + 1))
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
# Pattern 5: sleep with very short intervals
|
|
||||||
echo "5. Aggressive polling (sleep < 1s):" >&2
|
|
||||||
set +e
|
|
||||||
grep -rE "sleep 0\.[0-9]|sleep 0[^0-9]" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
|
|
||||||
| grep -v "Binary" | grep -v ".git" | head -5
|
|
||||||
set -e
|
|
||||||
issues_found=$((issues_found + 1))
|
|
||||||
echo "" >&2
|
|
||||||
|
|
||||||
echo "=== Recommendations ===" >&2
|
|
||||||
echo "1. Replace 'while true + sleep' with event-driven I/O (inotifywait, read -t, etc.)" >&2
|
|
||||||
echo "2. Use /proc and /sys instead of forking date, sensors, acpi, etc." >&2
|
|
||||||
echo "3. Cache frequently accessed values (e.g., in /tmp state files)" >&2
|
|
||||||
echo "4. Use bash builtins: printf %()T instead of date, \${var//} instead of tr, etc." >&2
|
|
||||||
echo "5. Use i3blocks interval=persist + event loop instead of polling mode" >&2
|
|
||||||
echo "6. Increase polling intervals: 1s → 5s → 10s where acceptable" >&2
|
|
||||||
}
|
|
||||||
|
|
||||||
# Handle special modes
|
|
||||||
case "${1:-}" in
|
|
||||||
--profile)
|
|
||||||
profile_polling_scripts "${2:-60}"
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
--diagnose)
|
|
||||||
diagnose_polling_scripts
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
--init-artifacts)
|
|
||||||
shift
|
|
||||||
exec "$ARTIFACT_INIT_SCRIPT" "$@"
|
|
||||||
;;
|
|
||||||
--help)
|
|
||||||
grep '^# Usage:' "$0" | sed 's/^# //' | head -1
|
|
||||||
grep '^# ' "$0" | sed 's/^# / /'
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Default: run usage_report.py with all remaining args
|
|
||||||
exec python3 "$REPORT_SCRIPT" "$@"
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# Check that every directory containing C/C++ source files has a Makefile and run.sh.
|
|
||||||
# Used as a pre-commit hook; receives staged file paths as arguments.
|
|
||||||
|
|
||||||
set -uo pipefail
|
|
||||||
|
|
||||||
errors=()
|
|
||||||
declare -A checked_dirs
|
|
||||||
|
|
||||||
for file in "$@"; do
|
|
||||||
dir=$(dirname "$file")
|
|
||||||
|
|
||||||
# Skip build directories and CMake artefact trees
|
|
||||||
if echo "$dir" | grep -qE '(^|/)build(/|$)'; then
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Skip if already checked this directory
|
|
||||||
[[ -v checked_dirs["$dir"] ]] && continue
|
|
||||||
checked_dirs["$dir"]=1
|
|
||||||
|
|
||||||
# Check for Makefile (case-insensitive: Makefile or makefile)
|
|
||||||
if ! compgen -G "$dir/[Mm]akefile" > /dev/null 2>&1; then
|
|
||||||
errors+=("MISSING Makefile in: $dir")
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check for run.sh
|
|
||||||
if [[ ! -f "$dir/run.sh" ]]; then
|
|
||||||
errors+=("MISSING run.sh in: $dir")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ ${#errors[@]} -gt 0 ]]; then
|
|
||||||
printf 'C/C++ build file check failed:\n'
|
|
||||||
printf ' %s\n' "${errors[@]}"
|
|
||||||
printf '\nEvery directory with .c/.cpp files must have a Makefile and run.sh.\n'
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
33
setup.sh
33
setup.sh
@ -1,33 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# Post-clone setup script for testsAndMisc repository.
|
|
||||||
# Run once after cloning: ./setup.sh
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
repo_root="$(git rev-parse --show-toplevel)"
|
|
||||||
cd "$repo_root"
|
|
||||||
|
|
||||||
printf 'Configuring git hooks path...\n'
|
|
||||||
git config core.hooksPath linux_configuration/.githooks
|
|
||||||
printf ' ✓ core.hooksPath set to linux_configuration/.githooks\n'
|
|
||||||
|
|
||||||
# Check for C/C++ and shell lint tools (used by pre-commit hooks)
|
|
||||||
MISSING=()
|
|
||||||
for cmd in clang-format cppcheck flawfinder shellcheck node npx; do
|
|
||||||
command -v "$cmd" >/dev/null 2>&1 || MISSING+=("$cmd")
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ ${#MISSING[@]} -gt 0 ]]; then
|
|
||||||
printf '\n⚠ Missing tools for pre-commit hooks: %s\n' "${MISSING[*]}"
|
|
||||||
if command -v pacman >/dev/null 2>&1; then
|
|
||||||
printf ' Install with: sudo pacman -S --needed %s\n' "${MISSING[*]}"
|
|
||||||
elif command -v apt-get >/dev/null 2>&1; then
|
|
||||||
printf ' Install with: sudo apt-get install %s\n' "${MISSING[*]}"
|
|
||||||
else
|
|
||||||
printf ' Please install: %s\n' "${MISSING[*]}"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
printf ' ✓ All lint tools available\n'
|
|
||||||
fi
|
|
||||||
|
|
||||||
printf '\nSetup complete.\n'
|
|
||||||
Loading…
Reference in New Issue
Block a user