diff --git a/.binary-allowlist b/.binary-allowlist deleted file mode 100644 index 93631ab..0000000 --- a/.binary-allowlist +++ /dev/null @@ -1,3 +0,0 @@ -# Binary files allowed in the repository. -# One glob pattern per line. These are essential for builds and cannot be external. -# Lines starting with # are comments. diff --git a/.fvmrc b/.fvmrc deleted file mode 100644 index 5a4e0c2..0000000 --- a/.fvmrc +++ /dev/null @@ -1,3 +0,0 @@ -{ - "flutter": "stable" -} diff --git a/.fvmrc b/.fvmrc new file mode 120000 index 0000000..f818eae --- /dev/null +++ b/.fvmrc @@ -0,0 +1 @@ +meta/.fvmrc \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4145c66..159eceb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ # See http://help.github.com/ignore-files/ for more about ignoring files. +# Sensitive — must NEVER be committed (contains regex of home GPS coordinates etc.) +.secret-patterns + # Compiled output /dist /tmp diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 0210df1..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,449 +0,0 @@ -# ============================================================================== -# Pre-commit Configuration - Multi-language Linting & Formatting -# ============================================================================== -# Install: pre-commit install && pre-commit install --hook-type pre-push -# Fast lint: pre-commit run --all-files (linters only, ~10 s) -# Full suite: pre-commit run --all-files --hook-stage pre-push (+ tests) -# Update hooks: pre-commit autoupdate -# ============================================================================== - -# Global settings -default_language_version: - python: python3 - -# Fail fast on first error (set to false to see all errors) -fail_fast: false - -# Configuration -ci: - autofix_commit_msg: "style: auto-fix by pre-commit hooks" - autoupdate_commit_msg: "chore: update pre-commit hooks" - -repos: - # =========================================================================== - # GENERAL HOOKS - File formatting and validation - # =========================================================================== - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 - hooks: - - id: trailing-whitespace - args: [--markdown-linebreak-ext=md] - - id: end-of-file-fixer - - id: check-yaml - args: [--unsafe] - - id: check-json - # Exclude JSONC files (VS Code configs, TypeScript configs) and compile_commands.json - exclude: ^(\.vscode/|.*/\.vscode/|C/compile_commands\.json|.*tsconfig.*\.json) - - id: check-toml - - id: check-xml - - id: check-added-large-files - args: [--maxkb=2000] - - id: check-merge-conflict - - id: check-case-conflict - - id: check-symlinks - - id: check-executables-have-shebangs - - id: check-shebang-scripts-are-executable - - id: detect-private-key - - id: debug-statements - - id: name-tests-test - args: [--pytest-test-first] - - id: check-ast - - id: check-builtin-literals - - id: check-docstring-first - - id: fix-byte-order-marker - - id: mixed-line-ending - args: [--fix=lf] - - id: requirements-txt-fixer - - # =========================================================================== - # BINARY BLOCKER - Prevent binary/image files from being committed - # =========================================================================== - - repo: local - hooks: - - id: no-binaries - name: Block binary/image files - entry: scripts/check_no_binaries.sh - language: script - always_run: false - - id: ai-evidence-contract - name: Require AI evidence artifacts for code changes - entry: scripts/check_ai_evidence.sh - language: script - pass_filenames: false - always_run: true - - id: ai-multifile-contract - name: Require workflow contract for multi-file code changes - entry: scripts/check_agent_contract.sh - language: script - pass_filenames: false - always_run: true - - id: append-only-sessions - name: Enforce append-only session logs - entry: scripts/check_append_only_sessions.sh - language: script - pass_filenames: false - always_run: true - - # =========================================================================== - # POLLING SCRIPT LINTER - Detect fork-storm anti-patterns in shell scripts - # =========================================================================== - - repo: local - hooks: - - id: no-polling-antipatterns - name: Block polling script anti-patterns - entry: scripts/check_polling_antipatterns.sh - language: script - types: [shell] - exclude: ^(\.git/|C/|CPP/|phone_focus_mode/lib/tests/|tests/) - - # =========================================================================== - # NOQA BLOCKER - Zero tolerance for noqa/type:ignore suppression comments - # =========================================================================== - - repo: local - hooks: - - id: no-noqa - name: Block noqa comments - entry: '(?i)#\s*(noqa|type:\s*ignore)' - language: pygrep - types: [python] - - id: no-ruff-noqa - name: Block ruff noqa file-level comments - entry: '(?i)#\s*ruff:\s*noqa' - language: pygrep - types: [python] - - # =========================================================================== - # RUFF - Fast Python linter and formatter (replaces black, isort, flake8, etc.) - # =========================================================================== - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.2 - hooks: - # Linter - run first to catch issues - - id: ruff - args: - - --fix - - --unsafe-fixes - - --exit-non-zero-on-fix - - --show-fixes - types_or: [python, pyi] - # Formatter - run after linting - - id: ruff-format - types_or: [python, pyi] - - # =========================================================================== - # MYPY - Static type checking (runs on push only for speed) - # =========================================================================== - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.13.0 - hooks: - - id: mypy - stages: [pre-push] - args: - - --ignore-missing-imports - - --no-error-summary - - --disable-error-code=no-untyped-def - - --disable-error-code=no-untyped-call - - --disable-error-code=var-annotated - - --disable-error-code=no-any-unimported - - --disable-error-code=type-arg - - --disable-error-code=no-any-return - - --disable-error-code=misc - - --disable-error-code=unused-ignore - - --disable-error-code=unreachable - - --disable-error-code=assignment - - --disable-error-code=no-redef - - --disable-error-code=attr-defined - - --disable-error-code=arg-type - - --disable-error-code=union-attr - - --disable-error-code=call-overload - - --disable-error-code=return-value - - --disable-error-code=redundant-cast - - --disable-error-code=empty-body - - --disable-error-code=list-item - exclude: >- - (?x)^( - Bash/.*| - \.venv/.*| - linux_configuration/scripts/misc/testsAndMisc-bash/tools/.* - )$ - additional_dependencies: - - types-requests - - types-PyYAML - - types-python-dateutil - - # =========================================================================== - # PYLINT - Comprehensive Python linter (runs on push only for speed) - # =========================================================================== - - repo: https://github.com/pylint-dev/pylint - rev: v3.3.2 - hooks: - - id: pylint - stages: [pre-push] - args: - - --rcfile=pyproject.toml - - --fail-under=8.0 - - --jobs=0 - additional_dependencies: - - pytest - - python-chess - - requests - - pygame - exclude: ^(Bash/|\.venv/) - - # =========================================================================== - # BANDIT - Security linter (runs on push only for speed) - # =========================================================================== - - repo: https://github.com/PyCQA/bandit - rev: 1.7.10 - hooks: - - id: bandit - stages: [pre-push] - args: - - -c - - pyproject.toml - - --severity-level=high - - --confidence-level=medium - - --skip=B113 - additional_dependencies: ["bandit[toml]"] - exclude: ^(Bash/|\.venv/|tests/|.*test.*\.py$) - - # =========================================================================== - # PYTEST + COVERAGE - Run tests and enforce 100% code coverage - # Only tests for subpackages with changed files are run (see script). - # Runs on push only (slow); use --hook-stage pre-push to run manually. - # =========================================================================== - - repo: local - hooks: - - id: pytest-coverage - name: pytest with coverage enforcement - entry: python scripts/pytest_changed_packages.py - language: system - types: [python] - pass_filenames: true - stages: [pre-push] - - # =========================================================================== - # VULTURE - Dead code detection (disabled - doesn't work well with pre-commit) - # =========================================================================== - # - repo: https://github.com/jendrikseipp/vulture - # rev: v2.13 - # hooks: - # - id: vulture - # args: - # - --min-confidence=80 - # - --exclude=.venv,Bash,__pycache__ - # exclude: ^(Bash/|\.venv/) - - # =========================================================================== - # PYUPGRADE - Upgrade Python syntax (disabled - incompatible with Python 3.14) - # =========================================================================== - # - repo: https://github.com/asottile/pyupgrade - # rev: v3.19.0 - # hooks: - # - id: pyupgrade - # args: - # - --py310-plus - - # =========================================================================== - # CODESPELL - Spell checking in code (expanded ignore list for non-English) - # =========================================================================== - - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 - hooks: - - id: codespell - args: - - --skip=*.json,*.lock,*.min.js,*.min.css,.git,__pycache__,.venv,*.txt - - --ignore-words-list=als,ans,ect,nd,som,sur,te,nam,numer,lew,sie,wil,postion,clen,ther,folow,derrive,ony,tje,noe,theses,crate,doubleclick,wile,tabel,pary,blok,bloc,proces,serwer,parametr,adres,hart,dout,metod,tekst,synonim,grup,mosty,lokal,skalar,milion,nowe,tre,hel,alph - exclude: ^(Bash/ffmpeg-build/|LaTeX/|CPP/|.*\.geojson$) - - # =========================================================================== - # DOCFORMATTER - Format docstrings (disabled - causes recursion errors) - # =========================================================================== - # - repo: local - # hooks: - # - id: docformatter - # name: docformatter - # entry: docformatter - # language: system - # types: [python] - # args: - # - --in-place - # - --wrap-summaries=88 - # - --wrap-descriptions=88 - - # =========================================================================== - # INTERROGATE - Docstring coverage (disabled - causes recursion on large files) - # =========================================================================== - # - repo: https://github.com/econchick/interrogate - # rev: 1.7.0 - # hooks: - # - id: interrogate - # args: - # - --fail-under=0 - # - --verbose - # - --ignore-init-method - # - --ignore-init-module - # - --ignore-magic - # - --ignore-private - # - --ignore-semiprivate - # - --exclude=Bash,.venv,__pycache__ - # pass_filenames: false - - # =========================================================================== - # AUTOFLAKE - Remove unused imports/variables - # Disabled: fully redundant with ruff (F401, F841, F811) + --fix - # =========================================================================== - # - repo: https://github.com/PyCQA/autoflake - # rev: v2.3.1 - # hooks: - # - id: autoflake - # args: - # - --in-place - # - --remove-all-unused-imports - # - --remove-unused-variables - # - --remove-duplicate-keys - # - --expand-star-imports - - # =========================================================================== - # SAFETY - Check for security vulnerabilities in dependencies - # =========================================================================== - # Note: Safety requires API key for full functionality, disabled by default - # - repo: https://github.com/Lucas-C/pre-commit-hooks-safety - # rev: v1.3.2 - # hooks: - # - id: python-safety-dependencies-check - # files: requirements.*\.txt$ - - # =========================================================================== - # PYRIGHT - Microsoft's type checker (very strict, optional) - # =========================================================================== - # Uncomment to enable - can be slow and very strict - # - repo: https://github.com/RobertCraiworthy/pyright-action - # rev: v1.1.350 - # hooks: - # - id: pyright - - # =========================================================================== - # CHECK JSON/YAML/TOML formatting (runs on push only — slow Node.js startup) - # =========================================================================== - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v4.0.0-alpha.8 - hooks: - - id: prettier - types_or: [yaml, json, markdown] - exclude: ^(Bash/|\.venv/|.*\.lock$|C/compile_commands\.json) - stages: [pre-push] - - # =========================================================================== - # SHELLCHECK - Shell script linting - # Wrapper batches files to avoid OOM on large repos. - # =========================================================================== - - repo: local - hooks: - - id: shellcheck - name: shellcheck - entry: bash -c 'printf "%s\0" "$@" | xargs -0 -n 40 shellcheck --severity=warning' -- - language: system - types: [shell] - - # =========================================================================== - # CLANG-FORMAT - C/C++ code formatting - # =========================================================================== - - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v19.1.6 - hooks: - - id: clang-format - types_or: [c, c++] - - # =========================================================================== - # CPPCHECK - C/C++ static analysis - # =========================================================================== - - repo: local - hooks: - - id: cppcheck - name: cppcheck - entry: cppcheck - language: system - types_or: [c, c++] - args: - - --enable=warning,portability - - --force - - --quiet - - --error-exitcode=1 - - --inline-suppr - - --suppress=missingIncludeSystem - - --suppress=syntaxError - - --suppress=nullPointerOutOfResources - - --suppress=ctunullpointerOutOfResources - - --suppress=ctunullpointerOutOfMemory - - --std=c11 - - # =========================================================================== - # FLAWFINDER - C/C++ security scanner - # =========================================================================== - - repo: local - hooks: - - id: flawfinder - name: flawfinder - entry: flawfinder - language: system - types_or: [c, c++] - args: - - --error-level=5 - - --quiet - - --columns - - # =========================================================================== - # CHECK C/C++ BUILD FILES - Ensure every C/C++ dir has Makefile and run.sh - # =========================================================================== - - repo: local - hooks: - - id: check-c-cpp-build-files - name: check C/C++ dirs have Makefile and run.sh - entry: scripts/check_c_cpp_build_files.sh - language: script - types_or: [c, c++] - - # =========================================================================== - # CHECK PYTHON LOCATION - All Python files must be under python_pkg/ - # =========================================================================== - - repo: local - hooks: - - id: check-python-location - name: check Python files are under python_pkg/ - entry: scripts/check_python_location.sh - language: script - types: [python] - - # =========================================================================== - # REMOVE EMPTY DIRECTORIES - Clean up empty folders in the repo - # =========================================================================== - - repo: local - hooks: - - id: remove-empty-dirs - name: remove empty directories - entry: find . -type d -empty -not -path './.git/*' -delete -print - language: system - pass_filenames: false - always_run: true - - # =========================================================================== - # SECRET PATTERNS - Block commits containing sensitive data - # =========================================================================== - - repo: local - hooks: - - id: check-no-secrets - name: check for leaked secrets - entry: scripts/check_no_secrets.sh - language: script - exclude: ^(\.secret-patterns|\.pre-commit-config\.yaml|.*\.geojson)$ - - # =========================================================================== - # COMMITIZEN - Conventional commits (optional) - # =========================================================================== - # - repo: https://github.com/commitizen-tools/commitizen - # rev: v3.13.0 - # hooks: - # - id: commitizen - # - id: commitizen-branch - # stages: [push] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 120000 index 0000000..4e101de --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1 @@ +meta/.pre-commit-config.yaml \ No newline at end of file diff --git a/C/.clang-format b/C/.clang-format deleted file mode 100644 index 714fd81..0000000 --- a/C/.clang-format +++ /dev/null @@ -1,10 +0,0 @@ -BasedOnStyle: LLVM -IndentWidth: 4 -TabWidth: 4 -UseTab: Never -ColumnLimit: 100 -SortIncludes: true -AlignConsecutiveAssignments: true -AlignConsecutiveDeclarations: true -AllowShortIfStatementsOnASingleLine: false -BreakBeforeBraces: Allman diff --git a/C/.clang-tidy b/C/.clang-tidy deleted file mode 100644 index 0aa3fd4..0000000 --- a/C/.clang-tidy +++ /dev/null @@ -1,18 +0,0 @@ -Checks: > - clang-analyzer-*, - -clang-analyzer-security.*, - bugprone-*, - cert-err33-c, - cert-err34-c, - cert-fio38-c, - performance-*, - portability-*, - misc-unused-parameters -WarningsAsErrors: > - clang-analyzer-*, - bugprone-*, - cert-err33-c, - cert-err34-c, - cert-fio38-c -HeaderFilterRegex: ".*" -FormatStyle: none diff --git a/C/.gitignore b/C/.gitignore deleted file mode 100644 index ba0a89c..0000000 --- a/C/.gitignore +++ /dev/null @@ -1 +0,0 @@ -random_engine diff --git a/C/atop_agg/Makefile b/C/atop_agg/Makefile deleted file mode 100644 index 7317260..0000000 --- a/C/atop_agg/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -CC := gcc -CFLAGS := -O2 -std=c11 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Wno-unused-parameter -COV := -O0 -g --coverage -std=c11 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Wno-unused-parameter -DATOP_AGG_NO_MAIN - -SRC := atop_agg.c -HDR := atop_agg.h -BIN := atop_agg - -.PHONY: all clean rebuild test coverage - -all: $(BIN) - -$(BIN): $(SRC) $(HDR) - $(CC) $(CFLAGS) -o $@ $(SRC) - -test_atop_agg: test_atop_agg.c atop_agg.c atop_agg.h - $(CC) $(COV) -o test_atop_agg test_atop_agg.c atop_agg.c - -test: test_atop_agg - ./test_atop_agg - -coverage: test_atop_agg - ./test_atop_agg - lcov --capture --directory . --output-file coverage.info --no-external - lcov --remove coverage.info '*/test_atop_agg.c' --output-file coverage.info - genhtml coverage.info --output-directory coverage_html - @echo "Coverage report at coverage_html/index.html" - -clean: - rm -f $(BIN) test_atop_agg *.o *.gcda *.gcno coverage.info - rm -rf coverage_html - -rebuild: clean all diff --git a/C/atop_agg/atop_agg.c b/C/atop_agg/atop_agg.c deleted file mode 100644 index 276b5cf..0000000 --- a/C/atop_agg/atop_agg.c +++ /dev/null @@ -1,474 +0,0 @@ -/* - * atop_agg — fast per-PID aggregator for `atop -P PRC,PRM` output. - * - * Reads atop parseable output on stdin, folds it into per-PID CPU-tick - * and RSS trackers, and prints a compact TSV summary on stdout that a - * higher-level driver (Python) then name-folds into human-readable - * tables. This avoids the ~3s Python parse cost on a typical day's - * 1.7M-line atop dump; the C hot loop completes in well under a second - * so the pipeline runs at atop's own ~2s wall-clock floor. - * - * Output TSV lines: - * Wstart_epochend_epochdistinct_samplesmedian_interval - * Cpidnamedelta_ticks - * Rpidnamepeak_kbsum_kbsamples - */ -#include "atop_agg.h" - -#include -#include -#include -#include -#include - -/* - * A real-world day of atop on a dev box can see >700k distinct PIDs - * because every short-lived compiler/shell subprocess gets a fresh ID. - * 2M slots keeps the load factor below ~40% for that workload, keeping - * linear-probe chains short without dynamic resizing. - */ -#define HASH_CAP_BITS 21 -#define HASH_CAP (1u << HASH_CAP_BITS) -#define HASH_MASK (HASH_CAP - 1u) -#define MAX_EPOCHS 4096 -#define MAX_TOKENS 64 - -/* Knuth multiplicative hash → index in an open-addressed table. */ -static unsigned int hash_pid(int pid) -{ - unsigned int k = (unsigned int)pid; - return (k * 2654435761u) >> (32 - HASH_CAP_BITS); -} - -static PidCpu *cpu_slot(State *s, int pid) -{ - unsigned int h = hash_pid(pid); - for (unsigned int probes = 0; probes < HASH_CAP; probes++, h++) - { - PidCpu *slot = &s->cpu[h & HASH_MASK]; - if (slot->pid == pid) - { - return slot; - } - if (slot->pid == 0) - { - slot->pid = pid; - slot->first_ticks = -1; - slot->last_ticks = 0; - slot->samples = 0; - slot->name[0] = '\0'; - return slot; - } - } - /* Table full — drop the sample rather than loop forever. */ - return NULL; -} - -static PidRam *ram_slot(State *s, int pid) -{ - unsigned int h = hash_pid(pid); - for (unsigned int probes = 0; probes < HASH_CAP; probes++, h++) - { - PidRam *slot = &s->ram[h & HASH_MASK]; - if (slot->pid == pid) - { - return slot; - } - if (slot->pid == 0) - { - slot->pid = pid; - slot->peak_kb = 0; - slot->sum_kb = 0; - slot->samples = 0; - slot->name[0] = '\0'; - return slot; - } - } - return NULL; -} - -static void add_epoch(State *s, long epoch) -{ - /* Linear scan — there are only a few dozen distinct epochs per log. */ - for (int i = 0; i < s->n_epochs; i++) - { - if (s->epochs[i] == epoch) - { - return; - } - } - if (s->n_epochs < MAX_EPOCHS) - { - s->epochs[s->n_epochs++] = epoch; - } -} - -/* - * Tokenise a whitespace-separated line in place. Fills *tokens* with - * pointers into *line* and returns the token count. A process name - * wrapped in parentheses is rejoined into a single token with spaces - * preserved (atop emits `(Web Content)` as three whitespace-split - * tokens, which we merge back). - */ -int tokenize_line(char *line, char **tokens, int max_tokens) -{ - int n = 0; - char *p = line; - while (*p && n < max_tokens) - { - while (*p == ' ' || *p == '\t') - { - p++; - } - if (!*p || *p == '\n') - { - break; - } - char *start = p; - if (*p == '(') - { - /* Consume through the matching ')', preserving interior spaces. */ - while (*p && *p != ')') - { - p++; - } - if (*p == ')') - { - p++; - } - } - else - { - while (*p && *p != ' ' && *p != '\t' && *p != '\n') - { - p++; - } - } - if (*p) - { - *p = '\0'; - p++; - } - tokens[n++] = start; - } - return n; -} - -/* - * Copy *src* into *dst* (capacity *cap*), stripping a leading '(' and - * trailing ')' if both are present. Always null-terminates. If the - * resulting name is empty, writes "unknown". - */ -void copy_name(char *dst, size_t cap, const char *src) -{ - size_t len = strlen(src); - size_t start = 0; - if (len >= 2 && src[0] == '(' && src[len - 1] == ')') - { - start = 1; - len -= 2; - } - if (len == 0) - { - const char *fallback = "unknown"; - size_t flen = strlen(fallback); - if (flen >= cap) - { - flen = cap - 1; - } - memcpy(dst, fallback, flen); - dst[flen] = '\0'; - return; - } - if (len >= cap) - { - len = cap - 1; - } - memcpy(dst, src + start, len); - dst[len] = '\0'; -} - -/* - * Parse one PRC/PRM line and update *s*. Unknown labels and malformed - * records are silently skipped (atop emits a stable schema, but guard - * against future changes and header/separator lines). - */ -void process_line(char *line, State *s) -{ - char *tokens[MAX_TOKENS]; - int n = tokenize_line(line, tokens, MAX_TOKENS); - if (n < 11) - { - return; - } - const char *label = tokens[0]; - int is_prc = (label[0] == 'P' && label[1] == 'R' && label[2] == 'C' && label[3] == '\0'); - int is_prm = (label[0] == 'P' && label[1] == 'R' && label[2] == 'M' && label[3] == '\0'); - if (!is_prc && !is_prm) - { - return; - } - long epoch = strtol(tokens[2], NULL, 10); - int pid = (int)strtol(tokens[6], NULL, 10); - if (pid <= 0) - { - return; - } - const char *name_tok = tokens[7]; - if (is_prc) - { - long utime = strtol(tokens[9], NULL, 10); - long stime = strtol(tokens[10], NULL, 10); - long ticks = utime + stime; - add_epoch(s, epoch); - PidCpu *slot = cpu_slot(s, pid); - if (slot == NULL) - { - return; - } - if (slot->first_ticks < 0) - { - slot->first_ticks = ticks; - } - slot->last_ticks = ticks; - slot->samples++; - copy_name(slot->name, sizeof(slot->name), name_tok); - return; - } - /* PRM */ - if (n < 12) - { - return; - } - long rsize_kb = strtol(tokens[11], NULL, 10); - PidRam *slot = ram_slot(s, pid); - if (slot == NULL) - { - return; - } - if (rsize_kb > slot->peak_kb) - { - slot->peak_kb = rsize_kb; - } - slot->sum_kb += rsize_kb; - slot->samples++; - copy_name(slot->name, sizeof(slot->name), name_tok); -} - -static int cmp_long(const void *a, const void *b) -{ - long la = *(const long *)a; - long lb = *(const long *)b; - if (la < lb) - { - return -1; - } - if (la > lb) - { - return 1; - } - return 0; -} - -/* FNV-1a 32-bit over a NUL-terminated string; used to key the name table. */ -static unsigned int fnv1a(const char *s) -{ - unsigned int h = 2166136261u; - while (*s) - { - h ^= (unsigned char)*s++; - h *= 16777619u; - } - return h; -} - -/* - * Per-name aggregate, built in a second pass over cpu/ram tables so that - * the caller only has to parse a few thousand output rows instead of one - * row per PID. The name table is deliberately oversized (64k slots for an - * expected few-thousand names) to keep linear-probe chains short. - */ -#define NAME_CAP_BITS 16 -#define NAME_CAP (1u << NAME_CAP_BITS) -#define NAME_MASK (NAME_CAP - 1u) - -typedef struct -{ - char name[ATOP_AGG_NAME_MAX]; - long cpu_ticks; - int cpu_pids; - long peak_kb; - long sum_avg_kb; - int rss_samples; - int ram_pids; - char used; -} NameAgg; - -static NameAgg *name_slot(NameAgg *table, const char *name) -{ - unsigned int h = fnv1a(name); - for (unsigned int probes = 0; probes < NAME_CAP; probes++, h++) - { - NameAgg *slot = &table[h & NAME_MASK]; - if (!slot->used) - { - slot->used = 1; - /* copy_name already enforced \0-termination on the source. */ - size_t i = 0; - while (name[i] && i + 1 < sizeof(slot->name)) - { - slot->name[i] = name[i]; - i++; - } - slot->name[i] = '\0'; - return slot; - } - if (strcmp(slot->name, name) == 0) - { - return slot; - } - } - return NULL; -} - -/* Write the aggregated summary to *out* in the documented TSV schema. */ -void emit_results(State *s, FILE *out) -{ - long start_epoch = 0; - long end_epoch = 0; - long median_interval = 0; - if (s->n_epochs > 0) - { - qsort(s->epochs, (size_t)s->n_epochs, sizeof(long), cmp_long); - start_epoch = s->epochs[0]; - end_epoch = s->epochs[s->n_epochs - 1]; - if (s->n_epochs >= 2) - { - long deltas[MAX_EPOCHS]; - for (int i = 0; i < s->n_epochs - 1; i++) - { - deltas[i] = s->epochs[i + 1] - s->epochs[i]; - } - qsort(deltas, (size_t)(s->n_epochs - 1), sizeof(long), cmp_long); - median_interval = deltas[(s->n_epochs - 1) / 2]; - } - } - fprintf(out, "W\t%ld\t%ld\t%d\t%ld\n", start_epoch, end_epoch, s->n_epochs, median_interval); - - NameAgg *names = calloc(NAME_CAP, sizeof(NameAgg)); - if (!names) - { - return; - } - for (unsigned int i = 0; i < HASH_CAP; i++) - { - PidCpu *slot = &s->cpu[i]; - if (slot->pid == 0) - { - continue; - } - long delta = slot->last_ticks; - if (slot->samples >= 2) - { - delta = slot->last_ticks - slot->first_ticks; - if (delta < 0) - { - delta = 0; - } - } - NameAgg *na = name_slot(names, slot->name); - if (!na) - { - continue; - } - na->cpu_ticks += delta; - na->cpu_pids++; - } - for (unsigned int i = 0; i < HASH_CAP; i++) - { - PidRam *slot = &s->ram[i]; - if (slot->pid == 0) - { - continue; - } - long avg_kb = slot->samples ? slot->sum_kb / slot->samples : 0; - NameAgg *na = name_slot(names, slot->name); - if (!na) - { - continue; - } - if (slot->peak_kb > na->peak_kb) - { - na->peak_kb = slot->peak_kb; - } - na->sum_avg_kb += avg_kb; - na->rss_samples++; - na->ram_pids++; - } - for (unsigned int i = 0; i < NAME_CAP; i++) - { - NameAgg *na = &names[i]; - if (!na->used) - { - continue; - } - int pids = na->cpu_pids > na->ram_pids ? na->cpu_pids : na->ram_pids; - fprintf(out, "N\t%s\t%ld\t%ld\t%ld\t%d\t%d\n", na->name, na->cpu_ticks, na->peak_kb, - na->sum_avg_kb, na->rss_samples, pids); - } - free(names); -} - -State *state_new(void) -{ - State *s = calloc(1, sizeof(State)); - if (!s) - { - return NULL; - } - s->cpu = calloc(HASH_CAP, sizeof(PidCpu)); - s->ram = calloc(HASH_CAP, sizeof(PidRam)); - s->epochs = calloc(MAX_EPOCHS, sizeof(long)); - if (!s->cpu || !s->ram || !s->epochs) - { - state_free(s); - return NULL; - } - s->n_epochs = 0; - return s; -} - -void state_free(State *s) -{ - if (!s) - { - return; - } - free(s->cpu); - free(s->ram); - free(s->epochs); - free(s); -} - -#ifndef ATOP_AGG_NO_MAIN -int main(void) -{ - State *s = state_new(); - if (!s) - { - fprintf(stderr, "atop_agg: out of memory\n"); - return 1; - } - char *line = NULL; - size_t cap = 0; - ssize_t got; - while ((got = getline(&line, &cap, stdin)) != -1) - { - process_line(line, s); - } - free(line); - emit_results(s, stdout); - state_free(s); - return 0; -} -#endif diff --git a/C/atop_agg/atop_agg.h b/C/atop_agg/atop_agg.h deleted file mode 100644 index 6503199..0000000 --- a/C/atop_agg/atop_agg.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef ATOP_AGG_H -#define ATOP_AGG_H - -#include - -/* NAME_MAX capped to keep slot size compact; typical atop comm is 15 chars. */ -#define ATOP_AGG_NAME_MAX 40 - -typedef struct -{ - int pid; - char name[ATOP_AGG_NAME_MAX]; - long first_ticks; - long last_ticks; - int samples; -} PidCpu; - -typedef struct -{ - int pid; - char name[ATOP_AGG_NAME_MAX]; - long peak_kb; - long sum_kb; - int samples; -} PidRam; - -typedef struct -{ - PidCpu *cpu; - PidRam *ram; - long *epochs; - int n_epochs; -} State; - -State *state_new(void); -void state_free(State *s); -int tokenize_line(char *line, char **tokens, int max_tokens); -void copy_name(char *dst, size_t cap, const char *src); -void process_line(char *line, State *s); -void emit_results(State *s, FILE *out); - -#endif diff --git a/C/atop_agg/run.sh b/C/atop_agg/run.sh deleted file mode 100755 index fd2a98d..0000000 --- a/C/atop_agg/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash -# Build and demo atop_agg on today's atop log. -set -euo pipefail -cd "$(dirname "$0")" -make -LOG="${1:-/var/log/atop/atop_$(date +%Y%m%d)}" -if [[ ! -f "$LOG" ]]; then - echo "No atop log at $LOG; pass a path as arg 1." >&2 - exit 1 -fi -echo "Aggregating $LOG ..." >&2 -atop -r "$LOG" -P PRC,PRM | ./atop_agg | head -20 diff --git a/C/atop_agg/test_atop_agg.c b/C/atop_agg/test_atop_agg.c deleted file mode 100644 index 3dd14c2..0000000 --- a/C/atop_agg/test_atop_agg.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Unit tests for atop_agg helpers. Compiled with --coverage; aims for - * 100% line coverage of atop_agg.c (excluding main, which is guarded - * by -DATOP_AGG_NO_MAIN). - */ -#include "atop_agg.h" - -#include -#include -#include -#include - -static int failures = 0; - -#define CHECK(cond) \ - do \ - { \ - if (!(cond)) \ - { \ - fprintf(stderr, "FAIL %s:%d: %s\n", __FILE__, __LINE__, #cond); \ - failures++; \ - } \ - } while (0) - -static void test_copy_name(void) -{ - char buf[16]; - copy_name(buf, sizeof(buf), "(bash)"); - CHECK(strcmp(buf, "bash") == 0); - - copy_name(buf, sizeof(buf), "bash"); - CHECK(strcmp(buf, "bash") == 0); - - copy_name(buf, sizeof(buf), "()"); - CHECK(strcmp(buf, "unknown") == 0); - - copy_name(buf, sizeof(buf), ""); - CHECK(strcmp(buf, "unknown") == 0); - - /* Truncation. */ - copy_name(buf, sizeof(buf), "(veryverylongnameabc)"); - CHECK(strlen(buf) == sizeof(buf) - 1); - - /* Fallback truncation: buf too small for "unknown" itself. */ - char tiny[4]; - copy_name(tiny, sizeof(tiny), ""); - CHECK(strcmp(tiny, "unk") == 0); -} - -static void test_tokenize(void) -{ - char line[] = "PRC host 1000 2026/01/01 12:00:00 600 123 (bash) S 10 20\n"; - char *toks[32]; - int n = tokenize_line(line, toks, 32); - CHECK(n == 11); - CHECK(strcmp(toks[0], "PRC") == 0); - CHECK(strcmp(toks[7], "(bash)") == 0); - CHECK(strcmp(toks[10], "20") == 0); - - /* Multi-word parenthesised name. */ - char line2[] = "PRM host 1000 d t 600 200 (Web Content) S 4096 1 2 0 0\n"; - char *t2[32]; - int n2 = tokenize_line(line2, t2, 32); - CHECK(n2 >= 12); - CHECK(strncmp(t2[7], "(Web Content)", 13) == 0); - - /* Empty / whitespace-only line. */ - char empty[] = " \n"; - char *t3[4]; - CHECK(tokenize_line(empty, t3, 4) == 0); - - /* Max-tokens cap respected. */ - char big[] = "a b c d e f g h i j k"; - char *t4[3]; - CHECK(tokenize_line(big, t4, 3) == 3); - - /* Unclosed paren at EOL — consumed to end. */ - char unclosed[] = "(abc"; - char *t5[2]; - int n5 = tokenize_line(unclosed, t5, 2); - CHECK(n5 == 1); - CHECK(strcmp(t5[0], "(abc") == 0); -} - -static void test_process_and_emit(void) -{ - State *s = state_new(); - assert(s != NULL); - - /* Two PRC samples for PID 100: first utime+stime=30, last=100. - Delta should be 70. */ - char prc1[] = "PRC h 1000 d t 600 100 (cc1) S 10 20\n"; - char prc2[] = "PRC h 1600 d t 600 100 (cc1) S 70 30\n"; - process_line(prc1, s); - process_line(prc2, s); - - /* One PRM sample for PID 100: rss=4096 kB. */ - char prm1[] = "PRM h 1000 d t 600 100 (cc1) S 4096 100 4096 0 0\n"; - process_line(prm1, s); - - /* PRC sample for PID 200 seen only once → delta == last_ticks. */ - char prc3[] = "PRC h 1000 d t 600 200 (short) S 5 5\n"; - process_line(prc3, s); - - /* Header / separator / unknown label should be ignored. */ - char header[] = "# comment line\n"; - process_line(header, s); - char sep[] = "SEP\n"; - process_line(sep, s); - char other[] = "CPU h 1000 d t 600 0 0 0 0 0 0 0 0\n"; - process_line(other, s); - - /* Malformed: pid <= 0. */ - char bad_pid[] = "PRC h 1000 d t 600 0 (x) S 1 1\n"; - process_line(bad_pid, s); - - /* PRC short (<11 tokens) should not crash. */ - char prc_short[] = "PRC h 1000 d t 600 300 (y) S 1\n"; - process_line(prc_short, s); - - /* PRM short (<12 tokens) should not crash. */ - char prm_short[] = "PRM h 1000 d t 600 300 (y) S 4096 1 1 0\n"; - process_line(prm_short, s); - - /* Emit and sanity-check the output. */ - char *buf = NULL; - size_t sz = 0; - FILE *out = open_memstream(&buf, &sz); - assert(out != NULL); - emit_results(s, out); - fclose(out); - CHECK(strstr(buf, "W\t1000\t1600\t2\t600\n") != NULL); - /* cc1: cpu delta 70 (pid 100 two samples) + 0 pids column via max(cpu,ram). - Peak RSS 4096, sum_avg 4096, rss_samples 1, pids max(1,1)=1. */ - CHECK(strstr(buf, "N\tcc1\t70\t4096\t4096\t1\t1\n") != NULL); - /* short: single-sample pid 200 → delta == 10; no RAM, so peak/sum/rss=0. */ - CHECK(strstr(buf, "N\tshort\t10\t0\t0\t0\t1\n") != NULL); - free(buf); - state_free(s); -} - -static void test_empty_and_single_epoch(void) -{ - State *s = state_new(); - /* No input at all → window line with zeroes. */ - char *buf = NULL; - size_t sz = 0; - FILE *out = open_memstream(&buf, &sz); - emit_results(s, out); - fclose(out); - CHECK(strstr(buf, "W\t0\t0\t0\t0\n") != NULL); - free(buf); - state_free(s); - - /* Exactly one epoch → median interval stays 0. */ - s = state_new(); - char prc[] = "PRC h 500 d t 600 50 (a) S 1 1\n"; - process_line(prc, s); - buf = NULL; - sz = 0; - out = open_memstream(&buf, &sz); - emit_results(s, out); - fclose(out); - CHECK(strstr(buf, "W\t500\t500\t1\t0\n") != NULL); - free(buf); - state_free(s); -} - -static void test_delta_clamped_to_zero(void) -{ - /* Counter reset: last < first → delta must clamp to 0. */ - State *s = state_new(); - char a[] = "PRC h 100 d t 600 77 (x) S 50 50\n"; - char b[] = "PRC h 700 d t 600 77 (x) S 10 10\n"; - process_line(a, s); - process_line(b, s); - char *buf = NULL; - size_t sz = 0; - FILE *out = open_memstream(&buf, &sz); - emit_results(s, out); - fclose(out); - CHECK(strstr(buf, "N\tx\t0\t") != NULL); - free(buf); - state_free(s); -} - -static void test_hash_collision(void) -{ - /* Force two PIDs into adjacent slots (Knuth hash rarely collides on - small integers, but we sweep a range to exercise the linear-probe - branch). */ - State *s = state_new(); - for (int pid = 1; pid <= 2000; pid++) - { - char line[128]; - snprintf(line, sizeof(line), "PRC h 1000 d t 600 %d (p) S 1 1\n", pid); - process_line(line, s); - snprintf(line, sizeof(line), "PRM h 1000 d t 600 %d (p) S 4096 1 1 0 0\n", pid); - process_line(line, s); - } - state_free(s); -} - -static void test_state_free_null(void) -{ - /* Freeing NULL must be safe. */ - state_free(NULL); -} - -int main(void) -{ - test_copy_name(); - test_tokenize(); - test_process_and_emit(); - test_empty_and_single_epoch(); - test_delta_clamped_to_zero(); - test_hash_collision(); - test_state_free_null(); - if (failures > 0) - { - fprintf(stderr, "%d test failures\n", failures); - return 1; - } - printf("atop_agg tests: OK\n"); - return 0; -} diff --git a/C/compile_commands.json b/C/compile_commands.json deleted file mode 100644 index 10de239..0000000 --- a/C/compile_commands.json +++ /dev/null @@ -1,105 +0,0 @@ -[ - { - "directory": "/home/kuchy/testsAndMisc/C/1dvelocitysimulator", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/1dvelocitysimulator -I/home/kuchy/testsAndMisc/C -c main.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/1dvelocitysimulator/main.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/fps", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/fps -I/home/kuchy/testsAndMisc/C -c main.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/fps/main.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/imageViewer", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/imageViewer -I/home/kuchy/testsAndMisc/C -c main.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/imageViewer/main.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c main.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/main.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c micro_max.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/micro_max.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c movegen.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/movegen.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c perft.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/perft.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c search.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/search.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/misc", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc -I/home/kuchy/testsAndMisc/C -c generatingWordsEndingWIthalka.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/misc/generatingWordsEndingWIthalka.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/misc/randomJPG", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc/randomJPG -I/home/kuchy/testsAndMisc/C -c generate_images.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/misc/randomJPG/generate_images.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/misc/randomJPG", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc/randomJPG -I/home/kuchy/testsAndMisc/C -c generate_jpg.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/misc/randomJPG/generate_jpg.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/misc/split", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc/split -I/home/kuchy/testsAndMisc/C -c main.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/misc/split/main.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/opening_learner", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c chess.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/opening_learner/chess.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/opening_learner", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c engine.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/opening_learner/engine.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/opening_learner", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c gui.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/opening_learner/gui.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/opening_learner", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c main.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/opening_learner/main.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/opening_learner", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c mistakes.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/opening_learner/mistakes.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/scrapeWebsite", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/scrapeWebsite -I/home/kuchy/testsAndMisc/C -c scrape.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/scrapeWebsite/scrape.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/tests", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/tests -I/home/kuchy/testsAndMisc/C -c generatingPolishLettersOnWindowsTerminal.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/tests/generatingPolishLettersOnWindowsTerminal.c" - }, - { - "directory": "/home/kuchy/testsAndMisc/C/websocketServer", - "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/websocketServer -I/home/kuchy/testsAndMisc/C -c main.c -o /dev/null", - "file": "/home/kuchy/testsAndMisc/C/websocketServer/main.c" - } -] -[ - {\n "directory": "/home/kuchy/testsAndMisc/C/1dvelocitysimulator",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/1dvelocitysimulator -I/home/kuchy/testsAndMisc/C -c 1dvelocitysimulator/main.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/1dvelocitysimulator/main.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/fps",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/fps -I/home/kuchy/testsAndMisc/C -c fps/main.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/fps/main.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/imageViewer",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/imageViewer -I/home/kuchy/testsAndMisc/C -c imageViewer/main.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/imageViewer/main.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c lichess_random_engine/main.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/main.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c lichess_random_engine/micro_max.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/micro_max.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c lichess_random_engine/movegen.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/movegen.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c lichess_random_engine/perft.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/perft.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/lichess_random_engine",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/lichess_random_engine -I/home/kuchy/testsAndMisc/C -c lichess_random_engine/search.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/lichess_random_engine/search.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/misc",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc -I/home/kuchy/testsAndMisc/C -c misc/generatingWordsEndingWIthalka.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/misc/generatingWordsEndingWIthalka.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/misc/randomJPG",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc/randomJPG -I/home/kuchy/testsAndMisc/C -c misc/randomJPG/generate_images.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/misc/randomJPG/generate_images.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/misc/randomJPG",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc/randomJPG -I/home/kuchy/testsAndMisc/C -c misc/randomJPG/generate_jpg.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/misc/randomJPG/generate_jpg.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/misc/split",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/misc/split -I/home/kuchy/testsAndMisc/C -c misc/split/main.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/misc/split/main.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/opening_learner",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c opening_learner/chess.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/opening_learner/chess.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/opening_learner",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c opening_learner/engine.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/opening_learner/engine.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/opening_learner",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c opening_learner/gui.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/opening_learner/gui.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/opening_learner",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c opening_learner/main.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/opening_learner/main.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/opening_learner",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/opening_learner -I/home/kuchy/testsAndMisc/C -c opening_learner/mistakes.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/opening_learner/mistakes.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/scrapeWebsite",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/scrapeWebsite -I/home/kuchy/testsAndMisc/C -c scrapeWebsite/scrape.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/scrapeWebsite/scrape.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/tests",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/tests -I/home/kuchy/testsAndMisc/C -c tests/generatingPolishLettersOnWindowsTerminal.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/tests/generatingPolishLettersOnWindowsTerminal.c"\n }, {\n "directory": "/home/kuchy/testsAndMisc/C/websocketServer",\n "command": "clang -std=c11 -Wall -Wextra -Wpedantic -I/home/kuchy/testsAndMisc/C/websocketServer -I/home/kuchy/testsAndMisc/C -c websocketServer/main.c -o /dev/null",\n "file": "/home/kuchy/testsAndMisc/C/websocketServer/main.c"\n } -] diff --git a/C/cppcheck.txt b/C/cppcheck.txt deleted file mode 100644 index b80e984..0000000 --- a/C/cppcheck.txt +++ /dev/null @@ -1,40 +0,0 @@ -Checking 1dvelocitysimulator/main.c ... -1/20 files checked 2% done -Checking fps/main.c ... -2/20 files checked 10% done -Checking imageViewer/main.c ... -3/20 files checked 37% done -Checking lichess_random_engine/main.c ... -4/20 files checked 40% done -Checking lichess_random_engine/micro_max.c ... -5/20 files checked 49% done -Checking lichess_random_engine/movegen.c ... -6/20 files checked 60% done -Checking lichess_random_engine/perft.c ... -7/20 files checked 61% done -Checking lichess_random_engine/search.c ... -8/20 files checked 62% done -Checking misc/generatingWordsEndingWIthalka.c ... -9/20 files checked 63% done -Checking misc/randomJPG/generate_images.c ... -10/20 files checked 68% done -Checking misc/randomJPG/generate_jpg.c ... -11/20 files checked 73% done -Checking misc/split/main.c ... -12/20 files checked 74% done -Checking opening_learner/chess.c ... -13/20 files checked 83% done -Checking opening_learner/engine.c ... -14/20 files checked 86% done -Checking opening_learner/gui.c ... -15/20 files checked 90% done -Checking opening_learner/main.c ... -16/20 files checked 93% done -Checking opening_learner/mistakes.c ... -17/20 files checked 95% done -Checking scrapeWebsite/scrape.c ... -18/20 files checked 98% done -Checking tests/generatingPolishLettersOnWindowsTerminal.c ... -19/20 files checked 98% done -Checking websocketServer/main.c ... -20/20 files checked 100% done diff --git a/C/flawfinder.txt b/C/flawfinder.txt deleted file mode 100644 index 4876b8e..0000000 --- a/C/flawfinder.txt +++ /dev/null @@ -1,627 +0,0 @@ -Flawfinder version 2.0.19, (C) 2001-2019 David A. Wheeler. -Number of rules (primarily dangerous function names) in C/C++ ruleset: 222 -./1dvelocitysimulator/main.c:16:5: [4] (shell) system: - This causes a new program to execute and is difficult to use safely - (CWE-78). try using a library call that implements the same functionality - if available. -./1dvelocitysimulator/main.c:22:5: [4] (shell) system: - This causes a new program to execute and is difficult to use safely - (CWE-78). try using a library call that implements the same functionality - if available. -./1dvelocitysimulator/main.c:27:5: [4] (shell) system: - This causes a new program to execute and is difficult to use safely - (CWE-78). try using a library call that implements the same functionality - if available. -./lichess_random_engine/movegen.c:35:20: [4] (buffer) strcpy: - Does not check for buffer overflows when copying to destination [MS-banned] - (CWE-120). Consider using snprintf, strcpy_s, or strlcpy (warning: strncpy - easily misused). -./opening_learner/engine.c:21:9: [4] (shell) execlp: - This causes a new program to execute and is difficult to use safely - (CWE-78). try using a library call that implements the same functionality - if available. -./scrapeWebsite/scrape.c:49:8: [4] (race) access: - This usually indicates a security flaw. If an attacker can change anything - along the path between the call to access() and the file's actual use - (e.g., by moving files), the attacker can exploit the race condition - (CWE-362/CWE-367!). Set up the correct permissions (e.g., using setuid()) - and try to open the file directly. -./fps/main.c:521:2: [3] (random) srand: - This function is not sufficiently random for security-related functions - such as key and nonce creation (CWE-327). Use a more secure technique for - acquiring random values. -./lichess_random_engine/main.c:112:2: [3] (random) srand: - This function is not sufficiently random for security-related functions - such as key and nonce creation (CWE-327). Use a more secure technique for - acquiring random values. -./lichess_random_engine/micro_max.c:228:52: [3] (random) srand: - This function is not sufficiently random for security-related functions - such as key and nonce creation (CWE-327). Use a more secure technique for - acquiring random values. -./misc/randomJPG/generate_images.c:257:5: [3] (random) srand: - This function is not sufficiently random for security-related functions - such as key and nonce creation (CWE-327). Use a more secure technique for - acquiring random values. -./misc/randomJPG/generate_jpg.c:208:5: [3] (random) srand: - This function is not sufficiently random for security-related functions - such as key and nonce creation (CWE-327). Use a more secure technique for - acquiring random values. -./opening_learner/main.c:49:2: [3] (random) srand: - This function is not sufficiently random for security-related functions - such as key and nonce creation (CWE-327). Use a more secure technique for - acquiring random values. -./fps/main.c:338:3: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:26:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:34:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:87:5: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./imageViewer/main.c:416:17: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:447:17: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:475:17: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:553:17: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:585:17: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:614:17: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:689:12: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:1137:9: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./imageViewer/main.c:1181:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:1188:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:1200:9: [2] (buffer) strcpy: - Does not check for buffer overflows when copying to destination [MS-banned] - (CWE-120). Consider using snprintf, strcpy_s, or strlcpy (warning: strncpy - easily misused). Risk is low because the source is a constant string. -./imageViewer/main.c:1207:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./imageViewer/main.c:1208:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./lichess_random_engine/micro_max.c:15:6: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./lichess_random_engine/micro_max.c:179:6: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./lichess_random_engine/movegen.c:35:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./lichess_random_engine/movegen.c:36:5: [2] (buffer) strcat: - Does not check for buffer overflows when concatenating to destination - [MS-banned] (CWE-120). Consider using strcat_s, strncat, strlcat, or - snprintf (warning: strncat is easily misused). Risk is low because the - source is a constant string. -./lichess_random_engine/perft.c:38:21: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./lichess_random_engine/perft.c:46:17: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./lichess_random_engine/perft.c:53:36: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./misc/randomJPG/generate_images.c:106:21: [2] (misc) fopen: - Check when opening files - can an attacker redirect it (via symlinks), - force the opening of special file type (e.g., device files), move things - around to create a race condition, control its ancestors, or change its - contents? (CWE-362). -./misc/randomJPG/generate_images.c:117:21: [2] (misc) fopen: - Check when opening files - can an attacker redirect it (via symlinks), - force the opening of special file type (e.g., device files), move things - around to create a race condition, control its ancestors, or change its - contents? (CWE-362). -./misc/randomJPG/generate_images.c:121:14: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./misc/randomJPG/generate_images.c:124:14: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./misc/randomJPG/generate_images.c:163:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./misc/randomJPG/generate_images.c:234:33: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_images.c:235:27: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_images.c:236:33: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_images.c:237:30: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_images.c:273:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./misc/randomJPG/generate_jpg.c:106:21: [2] (misc) fopen: - Check when opening files - can an attacker redirect it (via symlinks), - force the opening of special file type (e.g., device files), move things - around to create a race condition, control its ancestors, or change its - contents? (CWE-362). -./misc/randomJPG/generate_jpg.c:124:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./misc/randomJPG/generate_jpg.c:186:33: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_jpg.c:187:27: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_jpg.c:188:33: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_jpg.c:189:30: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./misc/randomJPG/generate_jpg.c:224:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/chess.c:253:33: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/chess.c:270:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/chess.h:11:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/chess.h:48:33: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:36:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:82:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:88:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:90:41: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:92:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:104:31: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./opening_learner/engine.c:105:66: [2] (integer) atoi: - Unless checked, the resulting number can exceed the expected range - (CWE-190). If source untrusted, check both minimum and maximum, even if the - input had no minus sign (large numbers can roll over into negative number; - consider saving to an unsigned value if that is intended). -./opening_learner/engine.c:106:25: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:124:59: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:126:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.c:128:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.h:11:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/engine.h:32:59: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/gui.c:73:29: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/gui.h:24:29: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:29:2: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:36:38: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:77:2: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:79:2: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:83:2: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:95:4: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:99:4: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:103:6: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./opening_learner/main.c:136:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:155:4: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/main.c:164:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/mistakes.c:32:15: [2] (misc) fopen: - Check when opening files - can an attacker redirect it (via symlinks), - force the opening of special file type (e.g., device files), move things - around to create a race condition, control its ancestors, or change its - contents? (CWE-362). -./opening_learner/mistakes.c:42:15: [2] (misc) fopen: - Check when opening files - can an attacker redirect it (via symlinks), - force the opening of special file type (e.g., device files), move things - around to create a race condition, control its ancestors, or change its - contents? (CWE-362). -./opening_learner/mistakes.c:44:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/mistakes.c:44:21: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/mistakes.c:44:41: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/mistakes.c:44:61: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/mistakes.c:49:13: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./opening_learner/mistakes.c:53:13: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./opening_learner/mistakes.c:57:13: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./opening_learner/mistakes.h:10:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/mistakes.h:11:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./opening_learner/mistakes.h:13:5: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./scrapeWebsite/scrape.c:28:5: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./scrapeWebsite/scrape.c:56:20: [2] (misc) fopen: - Check when opening files - can an attacker redirect it (via symlinks), - force the opening of special file type (e.g., device files), move things - around to create a race condition, control its ancestors, or change its - contents? (CWE-362). -./websocketServer/main.c:22:22: [2] (buffer) char: - Statically-sized arrays can be improperly restricted, leading to potential - overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use - functions that limit length, or ensure that the size is larger than the - maximum possible length. -./websocketServer/main.c:24:13: [2] (buffer) memcpy: - Does not check for buffer overflows when copying to destination (CWE-120). - Make sure destination can always hold the source data. -./fps/main.c:345:22: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./fps/main.c:346:22: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./fps/main.c:347:22: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:233:27: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:404:27: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:453:49: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:455:43: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:476:31: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:477:31: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:493:33: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:494:33: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:592:49: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:594:43: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:615:31: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:616:31: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:632:33: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:633:33: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:1182:18: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./imageViewer/main.c:1191:23: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./lichess_random_engine/micro_max.c:163:18: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./lichess_random_engine/micro_max.c:241:11: [1] (buffer) strncpy: - Easily used incorrectly; doesn't always \0-terminate or check for invalid - pointers [MS-banned] (CWE-120). -./lichess_random_engine/movegen.c:428:18: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./lichess_random_engine/movegen.c:439:25: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./opening_learner/chess.c:261:15: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./opening_learner/engine.c:38:9: [1] (obsolete) usleep: - This C routine is considered obsolete (as opposed to the shell command by - the same name). The interaction of this function with SIGALRM and other - timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is - unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead. -./opening_learner/engine.c:39:21: [1] (buffer) read: - Check buffer boundaries if used in a loop including recursive loops - (CWE-120, CWE-20). -./opening_learner/engine.c:49:9: [1] (obsolete) usleep: - This C routine is considered obsolete (as opposed to the shell command by - the same name). The interaction of this function with SIGALRM and other - timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is - unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead. -./opening_learner/engine.c:50:21: [1] (buffer) read: - Check buffer boundaries if used in a loop including recursive loops - (CWE-120, CWE-20). -./opening_learner/engine.c:72:18: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). -./opening_learner/engine.c:94:9: [1] (obsolete) usleep: - This C routine is considered obsolete (as opposed to the shell command by - the same name). The interaction of this function with SIGALRM and other - timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is - unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead. -./opening_learner/engine.c:95:17: [1] (buffer) read: - Check buffer boundaries if used in a loop including recursive loops - (CWE-120, CWE-20). -./opening_learner/engine.c:107:25: [1] (buffer) sscanf: - It's unclear if the %s limit in the format string is small enough - (CWE-120). Check that the limit is sufficiently small, or use a different - input function. -./opening_learner/engine.c:130:9: [1] (obsolete) usleep: - This C routine is considered obsolete (as opposed to the shell command by - the same name). The interaction of this function with SIGALRM and other - timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is - unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead. -./opening_learner/engine.c:131:17: [1] (buffer) read: - Check buffer boundaries if used in a loop including recursive loops - (CWE-120, CWE-20). -./opening_learner/engine.c:136:52: [1] (buffer) sscanf: - It's unclear if the %s limit in the format string is small enough - (CWE-120). Check that the limit is sufficiently small, or use a different - input function. -./opening_learner/main.c:23:15: [1] (buffer) strncat: - Easily used incorrectly (e.g., incorrectly computing the correct maximum - size to add) [MS-banned] (CWE-120). Consider strcat_s, strlcat, snprintf, - or automatically resizing strings. Risk is low because the source is a - constant character. -./opening_learner/main.c:24:2: [1] (buffer) strncat: - Easily used incorrectly (e.g., incorrectly computing the correct maximum - size to add) [MS-banned] (CWE-120). Consider strcat_s, strlcat, snprintf, - or automatically resizing strings. -./opening_learner/main.c:36:73: [1] (buffer) strncpy: - Easily used incorrectly; doesn't always \0-terminate or check for invalid - pointers [MS-banned] (CWE-120). -./opening_learner/main.c:100:30: [1] (buffer) strncpy: - Easily used incorrectly; doesn't always \0-terminate or check for invalid - pointers [MS-banned] (CWE-120). -./opening_learner/main.c:140:5: [1] (buffer) strncpy: - Easily used incorrectly; doesn't always \0-terminate or check for invalid - pointers [MS-banned] (CWE-120). -./websocketServer/main.c:23:30: [1] (buffer) strlen: - Does not handle strings that are not \0-terminated; if given one it may - perform an over-read (it could cause a crash if unprotected) (CWE-126). - -ANALYSIS SUMMARY: - -Hits = 140 -Lines analyzed = 5027 in approximately 0.26 seconds (19578 lines/second) -Physical Source Lines of Code (SLOC) = 4111 -Hits@level = [0] 208 [1] 41 [2] 87 [3] 6 [4] 6 [5] 0 -Hits@level+ = [0+] 348 [1+] 140 [2+] 99 [3+] 12 [4+] 6 [5+] 0 -Hits/KSLOC@level+ = [0+] 84.6509 [1+] 34.055 [2+] 24.0817 [3+] 2.919 [4+] 1.4595 [5+] 0 -Dot directories skipped = 1 (--followdotdir overrides) -Minimum risk level = 1 - -Not every hit is necessarily a security vulnerability. -You can inhibit a report by adding a comment in this form: -// flawfinder: ignore -Make *sure* it's a false positive! -You can use the option --neverignore to show these. - -There may be other security vulnerabilities; review your code! -See 'Secure Programming HOWTO' -(https://dwheeler.com/secure-programs) for more information. diff --git a/C/lint_all.sh b/C/lint_all.sh deleted file mode 100755 index bf6a770..0000000 --- a/C/lint_all.sh +++ /dev/null @@ -1,347 +0,0 @@ -#!/usr/bin/env bash - -# Lint all C code in C/ and its subfolders with aggressive rules -# - Installs required tools if missing (clang-tidy, clang-format, cppcheck, flawfinder) -# - Uses compile_commands.json if present for clang-tidy; otherwise uses sane defaults -# - Checks formatting with clang-format --dry-run --Werror -# - Runs cppcheck with exhaustive rules -# - Runs flawfinder for security issues - -set -u - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' - -info() { echo -e "${BLUE}==>${NC} $*"; } -ok() { echo -e "${GREEN}✓${NC} $*"; } -warn() { echo -e "${YELLOW}⚠${NC} $*"; } -err() { echo -e "${RED}✗${NC} $*"; } - -ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) -C_DIR="${ROOT_DIR}/C" -AUTOFIX=${LINT_AUTOFIX:-1} - -if [[ ! -d "${C_DIR}" ]]; then - err "C directory not found at ${C_DIR}" - exit 1 -fi - -ISSUES=0 -MISSING=() -C_FILES=() -C_SOURCES=() - -need_cmd() { - command -v "$1" >/dev/null 2>&1 || MISSING+=("$1") -} - -detect_pkg_manager() { - if command -v pacman >/dev/null 2>&1; then echo pacman; return; fi - if command -v apt-get >/dev/null 2>&1; then echo apt; return; fi - if command -v apt >/dev/null 2>&1; then echo apt; return; fi - if command -v dnf >/dev/null 2>&1; then echo dnf; return; fi - if command -v zypper >/dev/null 2>&1; then echo zypper; return; fi - if command -v apk >/dev/null 2>&1; then echo apk; return; fi - echo none -} - -install_tools() { - info "Checking required tools..." - need_cmd clang-tidy - need_cmd clang-format - need_cmd cppcheck - need_cmd flawfinder - - if [[ ${#MISSING[@]} -eq 0 ]]; then - ok "All tools present: clang-tidy, clang-format, cppcheck, flawfinder" - return 0 - fi - - warn "Missing tools: ${MISSING[*]} — attempting to install with sudo" - local pm - pm=$(detect_pkg_manager) - case "$pm" in - pacman) - sudo pacman -S --needed --noconfirm clang clang-tools-extra clang-format cppcheck flawfinder || true - ;; - apt|apt-get) - sudo "$pm" update -y || true - # clang-tidy and clang-format may be versioned; prefer unversioned meta pkgs - sudo "$pm" install -y clang-tidy clang-format cppcheck flawfinder || true - ;; - dnf) - sudo dnf install -y clang-tools-extra clang cppcheck flawfinder || true - ;; - zypper) - sudo zypper --non-interactive install clang-tools clang-tools-extra cppcheck flawfinder || true - ;; - apk) - sudo apk add clang-extra-tools clang cppcheck flawfinder || true - ;; - *) - warn "Unsupported package manager. Please install: clang-tidy clang-format cppcheck flawfinder" - ;; - esac - - # Re-check after attempted install - MISSING=() - need_cmd clang-tidy - need_cmd clang-format - need_cmd cppcheck - need_cmd flawfinder - if [[ ${#MISSING[@]} -ne 0 ]]; then - warn "Still missing: ${MISSING[*]} — continuing, but related steps may be skipped" - else - ok "Tools installed" - fi -} - -ensure_configs() { - # Provide default aggressive configs if missing - if [[ ! -f "${C_DIR}/.clang-tidy" ]]; then - warn ".clang-tidy not found in C/. Creating a default aggressive config." - cat >"${C_DIR}/.clang-tidy" <<'YAML' -Checks: > - clang-analyzer-*,bugprone-*,cert-*,concurrency-*,hicpp-*,misc-*,performance-*, - portability-*,readability-*,clang-diagnostic-*,cppcoreguidelines-* -WarningsAsErrors: '*' -HeaderFilterRegex: '.*' -AnalyzeTemporaryDtors: true -FormatStyle: none -YAML - fi - - if [[ ! -f "${C_DIR}/.clang-format" ]]; then - warn ".clang-format not found in C/. Creating a default style." - cat >"${C_DIR}/.clang-format" <<'YAML' -BasedOnStyle: LLVM -IndentWidth: 4 -TabWidth: 4 -UseTab: Never -ColumnLimit: 100 -SortIncludes: true -AlignConsecutiveAssignments: true -AlignConsecutiveDeclarations: true -AllowShortIfStatementsOnASingleLine: false -BreakBeforeBraces: Allman -Standard: C23 -YAML - fi -} - -collect_files() { - # shellcheck disable=SC2207 - C_FILES=($(find "${C_DIR}" -type f \( -name '*.c' -o -name '*.h' -o -name '*.inc' \) \ - -not -path '*/.*' -not -path '*/build/*' -not -path '*/dist/*' -not -path '*/out/*' \ - -not -path '*/bin/*' -not -path '*/obj/*')) - if [[ ${#C_FILES[@]} -eq 0 ]]; then - warn "No C files found under ${C_DIR}" - else - ok "Found ${#C_FILES[@]} C-related files to check" - fi - mapfile -t C_SOURCES < <(find "${C_DIR}" -type f -name '*.c' \ - -not -path '*/.*' -not -path '*/build/*' -not -path '*/dist/*' -not -path '*/out/*' \ - -not -path '*/bin/*' -not -path '*/obj/*') -} - -apply_clang_format_fix() { - if ! command -v clang-format >/dev/null 2>&1; then - warn "clang-format unavailable; skipping auto-format" - return - fi - if [[ ${#C_FILES[@]} -eq 0 ]]; then - return - fi - info "Applying clang-format -i to source files" - local formatted=0 - for f in "${C_FILES[@]}"; do - if clang-format -i "$f" 2>/dev/null; then - formatted=$((formatted+1)) - fi - done - ok "clang-format applied to ${formatted} file(s)" -} - -apply_clang_tidy_fix() { - if ! command -v clang-tidy >/dev/null 2>&1; then - warn "clang-tidy unavailable; skipping auto-fix" - return - fi - if [[ ${#C_SOURCES[@]} -eq 0 ]]; then - return - fi - local db="${C_DIR}/compile_commands.json" - local used_db="no" - if [[ -f "$db" ]] && head -n 1 "$db" | grep -q '\['; then - used_db="yes" - fi - info "Applying clang-tidy --fix to C sources" - local failures=0 - for f in "${C_SOURCES[@]}"; do - local rel - rel=$(realpath --relative-to="${ROOT_DIR}" "$f" 2>/dev/null || echo "$f") - printf ' • %s\n' "$rel" - if [[ "$used_db" == "yes" ]]; then - if ! clang-tidy "$f" -p "${C_DIR}" --fix --format-style=file --quiet >/dev/null 2>&1; then - failures=$((failures+1)) - fi - else - if ! clang-tidy "$f" --fix --format-style=file --quiet -- -std=c2x -I"$(dirname "$f")" -I"${C_DIR}" >/dev/null 2>&1; then - failures=$((failures+1)) - fi - fi - done - if [[ $failures -gt 0 ]]; then - warn "clang-tidy auto-fix encountered $failures issue(s); manual review may be required" - else - ok "clang-tidy auto-fix pass completed" - fi -} - -apply_autofix() { - if [[ "$AUTOFIX" == "0" ]]; then - info "Automatic fixes disabled (LINT_AUTOFIX=0)" - return - fi - info "Automatic fixes enabled (LINT_AUTOFIX=${AUTOFIX})" - apply_clang_format_fix - apply_clang_tidy_fix - # Refresh file lists in case new files were introduced by fixes - collect_files -} - -run_clang_format() { - if ! command -v clang-format >/dev/null 2>&1; then - warn "clang-format unavailable; skipping format check" - return - fi - info "Checking formatting with clang-format (--dry-run --Werror)" - local bad=0 - for f in "${C_FILES[@]}"; do - if ! clang-format --dry-run --Werror "$f" >/dev/null 2>&1; then - echo "format issue: $f" - bad=$((bad+1)) - fi - done - if [[ $bad -gt 0 ]]; then - warn "clang-format found $bad files needing formatting" - ISSUES=$((ISSUES+bad)) - else - ok "Formatting OK" - fi -} - -run_cppcheck() { - if ! command -v cppcheck >/dev/null 2>&1; then - warn "cppcheck unavailable; skipping" - return - fi - info "Running cppcheck (aggressive, recursive)" - # Use a temp report file to avoid noisy exit codes stopping script - local report - report=$(mktemp) - local opts=(--enable=all --inconclusive --std=c23 --check-level=exhaustive --force \ - --quiet --error-exitcode=2 --inline-suppr --suppress=missingIncludeSystem \ - --library=posix) - # Exclude common non-source dirs - opts+=(--exclude=build --exclude=dist --exclude=out --exclude=.git --exclude=bin --exclude=obj) - if ! cppcheck "${opts[@]}" "${C_DIR}" 2>"$report"; then - warn "cppcheck reported issues (see summary below)" - ISSUES=$((ISSUES+1)) - else - ok "cppcheck passed" - fi - if [[ -s "$report" ]]; then - echo - echo "cppcheck output:" && sed -e 's/^/ /' "$report" - fi - rm -f "$report" -} - -run_clang_tidy() { - if ! command -v clang-tidy >/dev/null 2>&1; then - warn "clang-tidy unavailable; skipping" - return - fi - info "Running clang-tidy on .c files" - local db="${C_DIR}/compile_commands.json" - local used_db="no" - if [[ ${#C_SOURCES[@]} -eq 0 ]]; then - warn "No .c files for clang-tidy" - return - fi - if [[ -f "$db" ]]; then - # Basic validation: ensure JSON array starts with [ and includes "directory" - if head -n 1 "$db" | grep -q '\['; then - used_db="yes" - else - warn "compile_commands.json seems malformed; ignoring" - fi - fi - local failures=0 - for f in "${C_SOURCES[@]}"; do - if [[ "$used_db" == "yes" ]]; then - clang-tidy "$f" -p "${C_DIR}" --quiet || failures=$((failures+1)) - else - # Fallback args: try C23 and include local dir - clang-tidy "$f" --quiet -- -std=c2x -I"$(dirname "$f")" -I"${C_DIR}" || failures=$((failures+1)) - fi - done - if [[ $failures -gt 0 ]]; then - warn "clang-tidy found issues in $failures file(s)" - ISSUES=$((ISSUES+failures)) - else - ok "clang-tidy passed" - fi -} - -run_flawfinder() { - if ! command -v flawfinder >/dev/null 2>&1; then - warn "flawfinder unavailable; skipping" - return - fi - info "Running flawfinder (security-focused scan)" - local report - report=$(mktemp) - if ! flawfinder --quiet --columns --minlevel=1 --falsepositive "${C_DIR}" >"$report" 2>/dev/null; then - warn "flawfinder reported issues" - ISSUES=$((ISSUES+1)) - else - ok "flawfinder completed" - fi - if [[ -s "$report" ]]; then - echo - echo "flawfinder notable findings:" && head -n 200 "$report" | sed -e 's/^/ /' - fi - rm -f "$report" -} - -summary_exit() { - echo - if [[ $ISSUES -gt 0 ]]; then - err "Lint completed with $ISSUES issue(s) detected" - echo "Tip: run 'clang-format -i' to fix formatting; many clang-tidy checks support '--fix'" - exit 1 - else - ok "All checks passed with no issues" - fi -} - -main() { - echo -e "${BLUE}C folder – aggressive lint suite${NC}" - echo - install_tools - ensure_configs - collect_files - apply_autofix - run_clang_format - run_cppcheck - run_clang_tidy - run_flawfinder - summary_exit -} - -main "$@" diff --git a/README.md b/README.md index f5074cc..cd55035 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # testsAndMisc -A collection of personal projects, scripts, and experiments — from a GPS-based phone focus tool to C/C++ demos, with CI, linting, and pre-commit hooks across the board. +A collection of personal projects, scripts, and experiments — from a GPS-based phone focus tool to Linux/Arch automation, with CI, linting, and pre-commit hooks across the board. ## Highlights @@ -14,21 +14,26 @@ Automated Arch Linux setup: fresh-install scripts, i3 window manager config, LaT ### [Scripts](scripts/) -Utility scripts for development workflows — C/C++ build file validation, secret detection, and custom makepkg helpers. +Utility scripts for development workflows — build file validation, secret detection, and custom makepkg helpers. -## Other Projects +## Repository Layout -| Directory | Description | -| ------------- | ------------------------ | -| `Bash/` | FFmpeg build scripts | -| `C/` | Small native helpers | -| `python_pkg/` | Python package structure | +| Path | Description | +| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `python_pkg/` | Python packages (each maintained subpackage lives here) | +| `linux_configuration/` | Arch Linux setup, i3 config, system maintenance scripts | +| `phone_focus_mode/` | GPS-based Android focus enforcer | +| `scripts/` | Workspace-level helper scripts and pre-commit hooks | +| `docs/` | Reference docs and historical reports | +| `third_party/` | Vendored upstream skills/agents | +| `meta/` | Repo-wide tooling: `pyproject.toml`, `requirements.txt`, `.pre-commit-config.yaml`, `run.sh`, `lint_python.sh`, `.fvmrc`. Symlinked into the repo root so tools that auto-discover from root keep working. | Archived / unmaintained projects live in the sibling repository [`testsAndMisc-archive`](https://github.com/kuhyx/testsAndMisc-archive). ## Tooling -- **Python linting**: [Ruff](https://docs.astral.sh/ruff/) with all rules enabled (see `pyproject.toml`) +- **Python linting**: [Ruff](https://docs.astral.sh/ruff/) with all rules enabled (see `meta/pyproject.toml`) +- **Dependencies**: `pip install -r meta/requirements.txt` (combined runtime + dev) - **CI**: GitHub Actions — lint, build, and test on push - **Testing**: pytest (Python), custom shell-based test harness for scripts diff --git a/docs/superpowers/evidence/cleanup-meta-folder-2026-05.json b/docs/superpowers/evidence/cleanup-meta-folder-2026-05.json new file mode 100644 index 0000000..6922b06 --- /dev/null +++ b/docs/superpowers/evidence/cleanup-meta-folder-2026-05.json @@ -0,0 +1,50 @@ +{ + "intent": "Consolidate repo-wide tooling configs into a meta/ folder, drop unused C/ and a few stale python_pkg subpackages, combine requirements files, and remove setup.sh + .binary-allowlist. Root-level symlinks keep tool auto-discovery (pyproject.toml, .pre-commit-config.yaml, requirements.txt, run.sh, lint_python.sh, .fvmrc) working unchanged.", + "scope": [ + "Delete: setup.sh, .binary-allowlist, C/, python_pkg/{split,pdfCentered,geo_data}, scripts/check_c_cpp_build_files.sh", + "Move into meta/: run.sh, lint_python.sh, pyproject.toml, .pre-commit-config.yaml, .fvmrc", + "Combine requirements.txt + requirements-dev.txt into meta/requirements.txt", + "Add root symlinks for each moved file so existing tooling keeps resolving from repo root", + "Update README.md and meta/.pre-commit-config.yaml to drop archived path references", + "Add .secret-patterns to .gitignore and untrack it (its content is sensitive home-coordinate regex)" + ], + "changes": [ + "git rm of removed paths and stale combined requirements files", + "git mv of five root configs into meta/ preserving git history", + "Created meta/requirements.txt as the single source of truth (alphabetically sorted, pre-commit-friendly)", + "Created six root symlinks pointing into meta/", + "Removed clang-format/cppcheck/flawfinder/check-c-cpp-build-files hooks (no C/C++ code remains)", + "Edited check-json, check_polling, codespell, gitleaks-style excludes to drop C/ and CPP/ references", + "Rewrote README.md repository layout section to mention meta/ and drop C/ + Bash/ rows" + ], + "verification": [ + { + "command": "ls -la run.sh pyproject.toml .pre-commit-config.yaml requirements.txt lint_python.sh .fvmrc", + "result": "pass", + "evidence": "All resolve as symlinks into meta/ with correct targets" + }, + { + "command": "python -c 'import tomllib; tomllib.load(open(\"pyproject.toml\",\"rb\"))'", + "result": "pass", + "evidence": "tomllib parses pyproject.toml through the root symlink" + }, + { + "command": "pre-commit validate-config", + "result": "pass", + "evidence": "Config validates after path edits" + }, + { + "command": "pre-commit run --hook-stage pre-commit", + "result": "pending", + "evidence": "Final run executed after adding this artifact" + } + ], + "risks": [ + "Anything that hard-coded the legacy path requirements-dev.txt or C/ breaks. No internal callers found via git grep.", + "If a tool resolves symlinks (rare) it might log the meta/ path instead of root; harmless." + ], + "rollback": [ + "git revert the consolidation commit; symlinks and moved files restore atomically because git tracks them.", + "Run pre-commit run --all-files after revert to confirm legacy layout still validates." + ] +} diff --git a/lint_python.sh b/lint_python.sh deleted file mode 100755 index 7de8f50..0000000 --- a/lint_python.sh +++ /dev/null @@ -1,346 +0,0 @@ -#!/usr/bin/env bash -# ============================================================================== -# Python Linting Script - Run ALL linters with aggressive settings -# ============================================================================== -# Usage: -# ./lint_python.sh # Lint all Python files -# ./lint_python.sh --fix # Lint and auto-fix where possible -# ./lint_python.sh # Lint specific file -# ./lint_python.sh --quick # Quick lint (ruff + mypy only) -# ./lint_python.sh --report # Generate detailed reports -# ============================================================================== - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -MAGENTA='\033[0;35m' -CYAN='\033[0;36m' -NC='\033[0m' # No Color -BOLD='\033[1m' - -# Configuration -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="${SCRIPT_DIR}" -PYTHON_PATHS=( - "PYTHON" - "articles" - "poker-modifier-app" - "tests" -) -EXCLUDE_PATHS=( - ".venv" - "__pycache__" - ".git" - "Bash/ffmpeg-build" - ".pytest_cache" - ".ruff_cache" - ".mypy_cache" -) - -# Build exclude pattern for find -EXCLUDE_PATTERN="" -for path in "${EXCLUDE_PATHS[@]}"; do - EXCLUDE_PATTERN="${EXCLUDE_PATTERN} -path '*/${path}/*' -prune -o" -done - -# Parse arguments -FIX_MODE=false -QUICK_MODE=false -REPORT_MODE=false -TARGET_FILES="" - -while [[ $# -gt 0 ]]; do - case $1 in - --fix|-f) - FIX_MODE=true - shift - ;; - --quick|-q) - QUICK_MODE=true - shift - ;; - --report|-r) - REPORT_MODE=true - shift - ;; - --help|-h) - echo "Usage: $0 [OPTIONS] [FILES...]" - echo "" - echo "Options:" - echo " --fix, -f Auto-fix issues where possible" - echo " --quick, -q Quick mode (ruff + mypy only)" - echo " --report, -r Generate detailed reports to ./lint-reports/" - echo " --help, -h Show this help message" - echo "" - echo "Examples:" - echo " $0 # Lint all Python files" - echo " $0 --fix # Lint and auto-fix" - echo " $0 PYTHON/ # Lint specific directory" - echo " $0 --quick --fix # Quick lint with auto-fix" - exit 0 - ;; - *) - TARGET_FILES="${TARGET_FILES} $1" - shift - ;; - esac -done - -# If no target specified, use default paths -if [[ -z "${TARGET_FILES}" ]]; then - TARGET_FILES="${PYTHON_PATHS[*]}" -fi - -# Create reports directory if needed -if [[ "${REPORT_MODE}" == true ]]; then - mkdir -p "${PROJECT_ROOT}/lint-reports" -fi - -# Track overall status -OVERALL_STATUS=0 -FAILED_TOOLS=() - -# ============================================================================== -# Helper functions -# ============================================================================== - -print_header() { - echo "" - echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}" - echo -e "${BOLD}${BLUE} $1${NC}" - echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}" -} - -print_subheader() { - echo "" - echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" - echo -e "${CYAN} $1${NC}" - echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" -} - -print_success() { - echo -e "${GREEN}✓${NC} $1" -} - -print_warning() { - echo -e "${YELLOW}⚠${NC} $1" -} - -print_error() { - echo -e "${RED}✗${NC} $1" -} - -print_info() { - echo -e "${BLUE}ℹ${NC} $1" -} - -run_tool() { - local tool_name="$1" - local tool_cmd="$2" - local report_file="${PROJECT_ROOT}/lint-reports/${tool_name}.txt" - - print_subheader "Running ${tool_name}..." - - if [[ "${REPORT_MODE}" == true ]]; then - if eval "${tool_cmd}" 2>&1 | tee "${report_file}"; then - print_success "${tool_name} passed" - return 0 - else - print_error "${tool_name} found issues (see ${report_file})" - FAILED_TOOLS+=("${tool_name}") - return 1 - fi - else - if eval "${tool_cmd}"; then - print_success "${tool_name} passed" - return 0 - else - print_error "${tool_name} found issues" - FAILED_TOOLS+=("${tool_name}") - return 1 - fi - fi -} - -check_tool() { - if command -v "$1" &> /dev/null; then - return 0 - else - print_warning "$1 not found, skipping..." - return 1 - fi -} - -# ============================================================================== -# Main linting process -# ============================================================================== - -print_header "Python Linting Suite - Aggressive Mode" -echo "" -print_info "Target: ${TARGET_FILES}" -print_info "Fix mode: ${FIX_MODE}" -print_info "Quick mode: ${QUICK_MODE}" -print_info "Report mode: ${REPORT_MODE}" - -cd "${PROJECT_ROOT}" - -# ============================================================================== -# RUFF - Primary linter and formatter -# ============================================================================== -if check_tool ruff; then - if [[ "${FIX_MODE}" == true ]]; then - run_tool "ruff-lint" "ruff check --fix --show-fixes ${TARGET_FILES}" || OVERALL_STATUS=1 - run_tool "ruff-format" "ruff format ${TARGET_FILES}" || OVERALL_STATUS=1 - else - run_tool "ruff-lint" "ruff check ${TARGET_FILES}" || OVERALL_STATUS=1 - run_tool "ruff-format-check" "ruff format --check ${TARGET_FILES}" || OVERALL_STATUS=1 - fi -fi - -# ============================================================================== -# MYPY - Static type checking -# ============================================================================== -if check_tool mypy; then - run_tool "mypy" "mypy --strict --ignore-missing-imports ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# Quick mode exits here -if [[ "${QUICK_MODE}" == true ]]; then - print_header "Quick Lint Complete" - if [[ ${#FAILED_TOOLS[@]} -gt 0 ]]; then - print_error "Failed tools: ${FAILED_TOOLS[*]}" - exit 1 - else - print_success "All quick checks passed!" - exit 0 - fi -fi - -# ============================================================================== -# PYLINT - Comprehensive linting -# ============================================================================== -if check_tool pylint; then - run_tool "pylint" "pylint --rcfile=pyproject.toml --jobs=0 --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# BANDIT - Security linting -# ============================================================================== -if check_tool bandit; then - run_tool "bandit" "bandit -c pyproject.toml -r ${TARGET_FILES} --severity-level low --confidence-level low" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# VULTURE - Dead code detection -# ============================================================================== -if check_tool vulture; then - run_tool "vulture" "vulture --min-confidence 80 ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# FLAKE8 - Traditional linter -# ============================================================================== -if check_tool flake8; then - run_tool "flake8" "flake8 --max-line-length=88 --extend-ignore=E203,W503 --max-complexity=10 ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# PYCODESTYLE - PEP 8 style checker -# ============================================================================== -if check_tool pycodestyle; then - run_tool "pycodestyle" "pycodestyle --max-line-length=88 --ignore=E203,W503 ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# PYDOCSTYLE - Docstring style checker -# ============================================================================== -if check_tool pydocstyle; then - run_tool "pydocstyle" "pydocstyle --convention=google ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# RADON - Complexity metrics -# ============================================================================== -if check_tool radon; then - print_subheader "Running radon (complexity analysis)..." - echo "" - echo -e "${MAGENTA}Cyclomatic Complexity:${NC}" - radon cc -a -s ${TARGET_FILES} || true - echo "" - echo -e "${MAGENTA}Maintainability Index:${NC}" - radon mi -s ${TARGET_FILES} || true - - if [[ "${REPORT_MODE}" == true ]]; then - radon cc -a -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-cc.txt" 2>&1 || true - radon mi -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-mi.txt" 2>&1 || true - fi -fi - -# ============================================================================== -# INTERROGATE - Docstring coverage -# ============================================================================== -if check_tool interrogate; then - run_tool "interrogate" "interrogate -v --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# PYRIGHT - Microsoft's type checker (optional, very strict) -# ============================================================================== -if check_tool pyright; then - run_tool "pyright" "pyright ${TARGET_FILES}" || OVERALL_STATUS=1 -fi - -# ============================================================================== -# AUTOFLAKE - Unused imports/variables (fix mode only) -# ============================================================================== -if [[ "${FIX_MODE}" == true ]] && check_tool autoflake; then - print_subheader "Running autoflake (removing unused imports)..." - find ${TARGET_FILES} -name "*.py" -type f -exec autoflake --in-place --remove-all-unused-imports --remove-unused-variables {} \; - print_success "autoflake completed" -fi - -# ============================================================================== -# PYUPGRADE - Upgrade Python syntax (fix mode only) -# ============================================================================== -if [[ "${FIX_MODE}" == true ]] && check_tool pyupgrade; then - print_subheader "Running pyupgrade (upgrading syntax to Python 3.10+)..." - find ${TARGET_FILES} -name "*.py" -type f -exec pyupgrade --py310-plus {} \; - print_success "pyupgrade completed" -fi - -# ============================================================================== -# CODESPELL - Spell checking -# ============================================================================== -if check_tool codespell; then - if [[ "${FIX_MODE}" == true ]]; then - run_tool "codespell" "codespell -w --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1 - else - run_tool "codespell" "codespell --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1 - fi -fi - -# ============================================================================== -# Summary -# ============================================================================== -print_header "Linting Summary" -echo "" - -if [[ ${OVERALL_STATUS} -ne 0 ]]; then - print_error "The following tools reported issues:" - for tool in "${FAILED_TOOLS[@]}"; do - echo " - ${tool}" - done - echo "" - if [[ "${REPORT_MODE}" == true ]]; then - print_info "Detailed reports saved to: ${PROJECT_ROOT}/lint-reports/" - fi - print_info "Run with --fix to auto-fix issues where possible" - exit ${OVERALL_STATUS} -else - print_success "All linting checks passed!" - exit 0 -fi diff --git a/lint_python.sh b/lint_python.sh new file mode 120000 index 0000000..d950ff5 --- /dev/null +++ b/lint_python.sh @@ -0,0 +1 @@ +meta/lint_python.sh \ No newline at end of file diff --git a/meta/.fvmrc b/meta/.fvmrc new file mode 100644 index 0000000..5a4e0c2 --- /dev/null +++ b/meta/.fvmrc @@ -0,0 +1,3 @@ +{ + "flutter": "stable" +} diff --git a/meta/.pre-commit-config.yaml b/meta/.pre-commit-config.yaml new file mode 100644 index 0000000..73a9e1c --- /dev/null +++ b/meta/.pre-commit-config.yaml @@ -0,0 +1,391 @@ +# ============================================================================== +# Pre-commit Configuration - Multi-language Linting & Formatting +# ============================================================================== +# Install: pre-commit install && pre-commit install --hook-type pre-push +# Fast lint: pre-commit run --all-files (linters only, ~10 s) +# Full suite: pre-commit run --all-files --hook-stage pre-push (+ tests) +# Update hooks: pre-commit autoupdate +# ============================================================================== + +# Global settings +default_language_version: + python: python3 + +# Fail fast on first error (set to false to see all errors) +fail_fast: false + +# Configuration +ci: + autofix_commit_msg: "style: auto-fix by pre-commit hooks" + autoupdate_commit_msg: "chore: update pre-commit hooks" + +repos: + # =========================================================================== + # GENERAL HOOKS - File formatting and validation + # =========================================================================== + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: end-of-file-fixer + - id: check-yaml + args: [--unsafe] + - id: check-json + # Exclude JSONC files (VS Code configs, TypeScript configs) + exclude: ^(\.vscode/|.*/\.vscode/|.*tsconfig.*\.json) + - id: check-toml + - id: check-xml + - id: check-added-large-files + args: [--maxkb=2000] + - id: check-merge-conflict + - id: check-case-conflict + - id: check-symlinks + - id: check-executables-have-shebangs + - id: check-shebang-scripts-are-executable + - id: detect-private-key + - id: debug-statements + - id: name-tests-test + args: [--pytest-test-first] + - id: check-ast + - id: check-builtin-literals + - id: check-docstring-first + - id: fix-byte-order-marker + - id: mixed-line-ending + args: [--fix=lf] + - id: requirements-txt-fixer + + # =========================================================================== + # BINARY BLOCKER - Prevent binary/image files from being committed + # =========================================================================== + - repo: local + hooks: + - id: no-binaries + name: Block binary/image files + entry: scripts/check_no_binaries.sh + language: script + always_run: false + - id: ai-evidence-contract + name: Require AI evidence artifacts for code changes + entry: scripts/check_ai_evidence.sh + language: script + pass_filenames: false + always_run: true + - id: ai-multifile-contract + name: Require workflow contract for multi-file code changes + entry: scripts/check_agent_contract.sh + language: script + pass_filenames: false + always_run: true + - id: append-only-sessions + name: Enforce append-only session logs + entry: scripts/check_append_only_sessions.sh + language: script + pass_filenames: false + always_run: true + + # =========================================================================== + # POLLING SCRIPT LINTER - Detect fork-storm anti-patterns in shell scripts + # =========================================================================== + - repo: local + hooks: + - id: no-polling-antipatterns + name: Block polling script anti-patterns + entry: scripts/check_polling_antipatterns.sh + language: script + types: [shell] + exclude: ^(\.git/|phone_focus_mode/lib/tests/|tests/) + + # =========================================================================== + # NOQA BLOCKER - Zero tolerance for noqa/type:ignore suppression comments + # =========================================================================== + - repo: local + hooks: + - id: no-noqa + name: Block noqa comments + entry: '(?i)#\s*(noqa|type:\s*ignore)' + language: pygrep + types: [python] + - id: no-ruff-noqa + name: Block ruff noqa file-level comments + entry: '(?i)#\s*ruff:\s*noqa' + language: pygrep + types: [python] + + # =========================================================================== + # RUFF - Fast Python linter and formatter (replaces black, isort, flake8, etc.) + # =========================================================================== + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.2 + hooks: + # Linter - run first to catch issues + - id: ruff + args: + - --fix + - --unsafe-fixes + - --exit-non-zero-on-fix + - --show-fixes + types_or: [python, pyi] + # Formatter - run after linting + - id: ruff-format + types_or: [python, pyi] + + # =========================================================================== + # MYPY - Static type checking (runs on push only for speed) + # =========================================================================== + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.13.0 + hooks: + - id: mypy + stages: [pre-push] + args: + - --ignore-missing-imports + - --no-error-summary + - --disable-error-code=no-untyped-def + - --disable-error-code=no-untyped-call + - --disable-error-code=var-annotated + - --disable-error-code=no-any-unimported + - --disable-error-code=type-arg + - --disable-error-code=no-any-return + - --disable-error-code=misc + - --disable-error-code=unused-ignore + - --disable-error-code=unreachable + - --disable-error-code=assignment + - --disable-error-code=no-redef + - --disable-error-code=attr-defined + - --disable-error-code=arg-type + - --disable-error-code=union-attr + - --disable-error-code=call-overload + - --disable-error-code=return-value + - --disable-error-code=redundant-cast + - --disable-error-code=empty-body + - --disable-error-code=list-item + exclude: >- + (?x)^( + Bash/.*| + \.venv/.*| + linux_configuration/scripts/misc/testsAndMisc-bash/tools/.* + )$ + additional_dependencies: + - types-requests + - types-PyYAML + - types-python-dateutil + + # =========================================================================== + # PYLINT - Comprehensive Python linter (runs on push only for speed) + # =========================================================================== + - repo: https://github.com/pylint-dev/pylint + rev: v3.3.2 + hooks: + - id: pylint + stages: [pre-push] + args: + - --rcfile=pyproject.toml + - --fail-under=8.0 + - --jobs=0 + additional_dependencies: + - pytest + - python-chess + - requests + - pygame + exclude: ^(Bash/|\.venv/) + + # =========================================================================== + # BANDIT - Security linter (runs on push only for speed) + # =========================================================================== + - repo: https://github.com/PyCQA/bandit + rev: 1.7.10 + hooks: + - id: bandit + stages: [pre-push] + args: + - -c + - pyproject.toml + - --severity-level=high + - --confidence-level=medium + - --skip=B113 + additional_dependencies: ["bandit[toml]"] + exclude: ^(Bash/|\.venv/|tests/|.*test.*\.py$) + + # =========================================================================== + # PYTEST + COVERAGE - Run tests and enforce 100% code coverage + # Only tests for subpackages with changed files are run (see script). + # Runs on push only (slow); use --hook-stage pre-push to run manually. + # =========================================================================== + - repo: local + hooks: + - id: pytest-coverage + name: pytest with coverage enforcement + entry: python scripts/pytest_changed_packages.py + language: system + types: [python] + pass_filenames: true + stages: [pre-push] + + # =========================================================================== + # VULTURE - Dead code detection (disabled - doesn't work well with pre-commit) + # =========================================================================== + # - repo: https://github.com/jendrikseipp/vulture + # rev: v2.13 + # hooks: + # - id: vulture + # args: + # - --min-confidence=80 + # - --exclude=.venv,Bash,__pycache__ + # exclude: ^(Bash/|\.venv/) + + # =========================================================================== + # PYUPGRADE - Upgrade Python syntax (disabled - incompatible with Python 3.14) + # =========================================================================== + # - repo: https://github.com/asottile/pyupgrade + # rev: v3.19.0 + # hooks: + # - id: pyupgrade + # args: + # - --py310-plus + + # =========================================================================== + # CODESPELL - Spell checking in code (expanded ignore list for non-English) + # =========================================================================== + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + args: + - --skip=*.json,*.lock,*.min.js,*.min.css,.git,__pycache__,.venv,*.txt + - --ignore-words-list=als,ans,ect,nd,som,sur,te,nam,numer,lew,sie,wil,postion,clen,ther,folow,derrive,ony,tje,noe,theses,crate,doubleclick,wile,tabel,pary,blok,bloc,proces,serwer,parametr,adres,hart,dout,metod,tekst,synonim,grup,mosty,lokal,skalar,milion,nowe,tre,hel,alph + exclude: ^(Bash/ffmpeg-build/|LaTeX/|.*\.geojson$) + + # =========================================================================== + # DOCFORMATTER - Format docstrings (disabled - causes recursion errors) + # =========================================================================== + # - repo: local + # hooks: + # - id: docformatter + # name: docformatter + # entry: docformatter + # language: system + # types: [python] + # args: + # - --in-place + # - --wrap-summaries=88 + # - --wrap-descriptions=88 + + # =========================================================================== + # INTERROGATE - Docstring coverage (disabled - causes recursion on large files) + # =========================================================================== + # - repo: https://github.com/econchick/interrogate + # rev: 1.7.0 + # hooks: + # - id: interrogate + # args: + # - --fail-under=0 + # - --verbose + # - --ignore-init-method + # - --ignore-init-module + # - --ignore-magic + # - --ignore-private + # - --ignore-semiprivate + # - --exclude=Bash,.venv,__pycache__ + # pass_filenames: false + + # =========================================================================== + # AUTOFLAKE - Remove unused imports/variables + # Disabled: fully redundant with ruff (F401, F841, F811) + --fix + # =========================================================================== + # - repo: https://github.com/PyCQA/autoflake + # rev: v2.3.1 + # hooks: + # - id: autoflake + # args: + # - --in-place + # - --remove-all-unused-imports + # - --remove-unused-variables + # - --remove-duplicate-keys + # - --expand-star-imports + + # =========================================================================== + # SAFETY - Check for security vulnerabilities in dependencies + # =========================================================================== + # Note: Safety requires API key for full functionality, disabled by default + # - repo: https://github.com/Lucas-C/pre-commit-hooks-safety + # rev: v1.3.2 + # hooks: + # - id: python-safety-dependencies-check + # files: requirements.*\.txt$ + + # =========================================================================== + # PYRIGHT - Microsoft's type checker (very strict, optional) + # =========================================================================== + # Uncomment to enable - can be slow and very strict + # - repo: https://github.com/RobertCraiworthy/pyright-action + # rev: v1.1.350 + # hooks: + # - id: pyright + + # =========================================================================== + # CHECK JSON/YAML/TOML formatting (runs on push only — slow Node.js startup) + # =========================================================================== + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + types_or: [yaml, json, markdown] + exclude: ^(Bash/|\.venv/|.*\.lock$) + stages: [pre-push] + + # =========================================================================== + # SHELLCHECK - Shell script linting + # Wrapper batches files to avoid OOM on large repos. + # =========================================================================== + - repo: local + hooks: + - id: shellcheck + name: shellcheck + entry: bash -c 'printf "%s\0" "$@" | xargs -0 -n 40 shellcheck --severity=warning' -- + language: system + types: [shell] + + # =========================================================================== + # CHECK PYTHON LOCATION - All Python files must be under python_pkg/ + # =========================================================================== + - repo: local + hooks: + - id: check-python-location + name: check Python files are under python_pkg/ + entry: scripts/check_python_location.sh + language: script + types: [python] + + # =========================================================================== + # REMOVE EMPTY DIRECTORIES - Clean up empty folders in the repo + # =========================================================================== + - repo: local + hooks: + - id: remove-empty-dirs + name: remove empty directories + entry: find . -type d -empty -not -path './.git/*' -delete -print + language: system + pass_filenames: false + always_run: true + + # =========================================================================== + # SECRET PATTERNS - Block commits containing sensitive data + # =========================================================================== + - repo: local + hooks: + - id: check-no-secrets + name: check for leaked secrets + entry: scripts/check_no_secrets.sh + language: script + exclude: ^(\.secret-patterns|\.pre-commit-config\.yaml|.*\.geojson)$ + + # =========================================================================== + # COMMITIZEN - Conventional commits (optional) + # =========================================================================== + # - repo: https://github.com/commitizen-tools/commitizen + # rev: v3.13.0 + # hooks: + # - id: commitizen + # - id: commitizen-branch + # stages: [push] diff --git a/meta/lint_python.sh b/meta/lint_python.sh new file mode 100755 index 0000000..7de8f50 --- /dev/null +++ b/meta/lint_python.sh @@ -0,0 +1,346 @@ +#!/usr/bin/env bash +# ============================================================================== +# Python Linting Script - Run ALL linters with aggressive settings +# ============================================================================== +# Usage: +# ./lint_python.sh # Lint all Python files +# ./lint_python.sh --fix # Lint and auto-fix where possible +# ./lint_python.sh # Lint specific file +# ./lint_python.sh --quick # Quick lint (ruff + mypy only) +# ./lint_python.sh --report # Generate detailed reports +# ============================================================================== + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +MAGENTA='\033[0;35m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color +BOLD='\033[1m' + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="${SCRIPT_DIR}" +PYTHON_PATHS=( + "PYTHON" + "articles" + "poker-modifier-app" + "tests" +) +EXCLUDE_PATHS=( + ".venv" + "__pycache__" + ".git" + "Bash/ffmpeg-build" + ".pytest_cache" + ".ruff_cache" + ".mypy_cache" +) + +# Build exclude pattern for find +EXCLUDE_PATTERN="" +for path in "${EXCLUDE_PATHS[@]}"; do + EXCLUDE_PATTERN="${EXCLUDE_PATTERN} -path '*/${path}/*' -prune -o" +done + +# Parse arguments +FIX_MODE=false +QUICK_MODE=false +REPORT_MODE=false +TARGET_FILES="" + +while [[ $# -gt 0 ]]; do + case $1 in + --fix|-f) + FIX_MODE=true + shift + ;; + --quick|-q) + QUICK_MODE=true + shift + ;; + --report|-r) + REPORT_MODE=true + shift + ;; + --help|-h) + echo "Usage: $0 [OPTIONS] [FILES...]" + echo "" + echo "Options:" + echo " --fix, -f Auto-fix issues where possible" + echo " --quick, -q Quick mode (ruff + mypy only)" + echo " --report, -r Generate detailed reports to ./lint-reports/" + echo " --help, -h Show this help message" + echo "" + echo "Examples:" + echo " $0 # Lint all Python files" + echo " $0 --fix # Lint and auto-fix" + echo " $0 PYTHON/ # Lint specific directory" + echo " $0 --quick --fix # Quick lint with auto-fix" + exit 0 + ;; + *) + TARGET_FILES="${TARGET_FILES} $1" + shift + ;; + esac +done + +# If no target specified, use default paths +if [[ -z "${TARGET_FILES}" ]]; then + TARGET_FILES="${PYTHON_PATHS[*]}" +fi + +# Create reports directory if needed +if [[ "${REPORT_MODE}" == true ]]; then + mkdir -p "${PROJECT_ROOT}/lint-reports" +fi + +# Track overall status +OVERALL_STATUS=0 +FAILED_TOOLS=() + +# ============================================================================== +# Helper functions +# ============================================================================== + +print_header() { + echo "" + echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}" + echo -e "${BOLD}${BLUE} $1${NC}" + echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}" +} + +print_subheader() { + echo "" + echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" + echo -e "${CYAN} $1${NC}" + echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" +} + +print_success() { + echo -e "${GREEN}✓${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}⚠${NC} $1" +} + +print_error() { + echo -e "${RED}✗${NC} $1" +} + +print_info() { + echo -e "${BLUE}ℹ${NC} $1" +} + +run_tool() { + local tool_name="$1" + local tool_cmd="$2" + local report_file="${PROJECT_ROOT}/lint-reports/${tool_name}.txt" + + print_subheader "Running ${tool_name}..." + + if [[ "${REPORT_MODE}" == true ]]; then + if eval "${tool_cmd}" 2>&1 | tee "${report_file}"; then + print_success "${tool_name} passed" + return 0 + else + print_error "${tool_name} found issues (see ${report_file})" + FAILED_TOOLS+=("${tool_name}") + return 1 + fi + else + if eval "${tool_cmd}"; then + print_success "${tool_name} passed" + return 0 + else + print_error "${tool_name} found issues" + FAILED_TOOLS+=("${tool_name}") + return 1 + fi + fi +} + +check_tool() { + if command -v "$1" &> /dev/null; then + return 0 + else + print_warning "$1 not found, skipping..." + return 1 + fi +} + +# ============================================================================== +# Main linting process +# ============================================================================== + +print_header "Python Linting Suite - Aggressive Mode" +echo "" +print_info "Target: ${TARGET_FILES}" +print_info "Fix mode: ${FIX_MODE}" +print_info "Quick mode: ${QUICK_MODE}" +print_info "Report mode: ${REPORT_MODE}" + +cd "${PROJECT_ROOT}" + +# ============================================================================== +# RUFF - Primary linter and formatter +# ============================================================================== +if check_tool ruff; then + if [[ "${FIX_MODE}" == true ]]; then + run_tool "ruff-lint" "ruff check --fix --show-fixes ${TARGET_FILES}" || OVERALL_STATUS=1 + run_tool "ruff-format" "ruff format ${TARGET_FILES}" || OVERALL_STATUS=1 + else + run_tool "ruff-lint" "ruff check ${TARGET_FILES}" || OVERALL_STATUS=1 + run_tool "ruff-format-check" "ruff format --check ${TARGET_FILES}" || OVERALL_STATUS=1 + fi +fi + +# ============================================================================== +# MYPY - Static type checking +# ============================================================================== +if check_tool mypy; then + run_tool "mypy" "mypy --strict --ignore-missing-imports ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# Quick mode exits here +if [[ "${QUICK_MODE}" == true ]]; then + print_header "Quick Lint Complete" + if [[ ${#FAILED_TOOLS[@]} -gt 0 ]]; then + print_error "Failed tools: ${FAILED_TOOLS[*]}" + exit 1 + else + print_success "All quick checks passed!" + exit 0 + fi +fi + +# ============================================================================== +# PYLINT - Comprehensive linting +# ============================================================================== +if check_tool pylint; then + run_tool "pylint" "pylint --rcfile=pyproject.toml --jobs=0 --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# BANDIT - Security linting +# ============================================================================== +if check_tool bandit; then + run_tool "bandit" "bandit -c pyproject.toml -r ${TARGET_FILES} --severity-level low --confidence-level low" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# VULTURE - Dead code detection +# ============================================================================== +if check_tool vulture; then + run_tool "vulture" "vulture --min-confidence 80 ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# FLAKE8 - Traditional linter +# ============================================================================== +if check_tool flake8; then + run_tool "flake8" "flake8 --max-line-length=88 --extend-ignore=E203,W503 --max-complexity=10 ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# PYCODESTYLE - PEP 8 style checker +# ============================================================================== +if check_tool pycodestyle; then + run_tool "pycodestyle" "pycodestyle --max-line-length=88 --ignore=E203,W503 ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# PYDOCSTYLE - Docstring style checker +# ============================================================================== +if check_tool pydocstyle; then + run_tool "pydocstyle" "pydocstyle --convention=google ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# RADON - Complexity metrics +# ============================================================================== +if check_tool radon; then + print_subheader "Running radon (complexity analysis)..." + echo "" + echo -e "${MAGENTA}Cyclomatic Complexity:${NC}" + radon cc -a -s ${TARGET_FILES} || true + echo "" + echo -e "${MAGENTA}Maintainability Index:${NC}" + radon mi -s ${TARGET_FILES} || true + + if [[ "${REPORT_MODE}" == true ]]; then + radon cc -a -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-cc.txt" 2>&1 || true + radon mi -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-mi.txt" 2>&1 || true + fi +fi + +# ============================================================================== +# INTERROGATE - Docstring coverage +# ============================================================================== +if check_tool interrogate; then + run_tool "interrogate" "interrogate -v --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# PYRIGHT - Microsoft's type checker (optional, very strict) +# ============================================================================== +if check_tool pyright; then + run_tool "pyright" "pyright ${TARGET_FILES}" || OVERALL_STATUS=1 +fi + +# ============================================================================== +# AUTOFLAKE - Unused imports/variables (fix mode only) +# ============================================================================== +if [[ "${FIX_MODE}" == true ]] && check_tool autoflake; then + print_subheader "Running autoflake (removing unused imports)..." + find ${TARGET_FILES} -name "*.py" -type f -exec autoflake --in-place --remove-all-unused-imports --remove-unused-variables {} \; + print_success "autoflake completed" +fi + +# ============================================================================== +# PYUPGRADE - Upgrade Python syntax (fix mode only) +# ============================================================================== +if [[ "${FIX_MODE}" == true ]] && check_tool pyupgrade; then + print_subheader "Running pyupgrade (upgrading syntax to Python 3.10+)..." + find ${TARGET_FILES} -name "*.py" -type f -exec pyupgrade --py310-plus {} \; + print_success "pyupgrade completed" +fi + +# ============================================================================== +# CODESPELL - Spell checking +# ============================================================================== +if check_tool codespell; then + if [[ "${FIX_MODE}" == true ]]; then + run_tool "codespell" "codespell -w --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1 + else + run_tool "codespell" "codespell --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1 + fi +fi + +# ============================================================================== +# Summary +# ============================================================================== +print_header "Linting Summary" +echo "" + +if [[ ${OVERALL_STATUS} -ne 0 ]]; then + print_error "The following tools reported issues:" + for tool in "${FAILED_TOOLS[@]}"; do + echo " - ${tool}" + done + echo "" + if [[ "${REPORT_MODE}" == true ]]; then + print_info "Detailed reports saved to: ${PROJECT_ROOT}/lint-reports/" + fi + print_info "Run with --fix to auto-fix issues where possible" + exit ${OVERALL_STATUS} +else + print_success "All linting checks passed!" + exit 0 +fi diff --git a/meta/pyproject.toml b/meta/pyproject.toml new file mode 100644 index 0000000..cdb3b6f --- /dev/null +++ b/meta/pyproject.toml @@ -0,0 +1,308 @@ +[project] +name = "testsandmisc" +version = "0.1.0" +description = "Collection of miscellaneous tests and scripts" +requires-python = ">=3.10" + +# ============================================================================ +# RUFF - Extremely fast Python linter and formatter (written in Rust) +# ============================================================================ +[tool.ruff] +target-version = "py310" +# Include all Python files +include = ["*.py", "**/*.py"] +# Exclude vendored/build directories +exclude = [ + ".git", + ".venv", + "__pycache__", + "build", + "dist", + ".eggs", + "Bash/ffmpeg-build", # Vendored FFmpeg tools +] + +[tool.ruff.lint] +# AGGRESSIVE: Select ALL rules from all categories +select = ["ALL"] +# Ignores for rules that are too strict for this mixed script repository +ignore = [ + # D203 vs D211 conflict - we use D211 (no blank line before class docstring) + "D203", # 1 blank line required before class docstring (conflicts with D211) + # D212 vs D213 conflict - we use D212 (summary on first line after """) + "D213", # Multi-line docstring summary should start at second line (conflicts with D212) + # Formatter conflicts - recommended to disable when using ruff format + # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules + "COM812", # Trailing comma missing - formatter handles this automatically + "ISC001", # Implicit string concatenation - formatter may create these when wrapping + # Security audit - prone to false positives with validated input + # https://github.com/astral-sh/ruff/issues/4045 + "S603", # subprocess call without shell - prone to false positives as it is + # difficult to determine whether the passed arguments have been validated +] + +# Allow ALL rules to be auto-fixed +fixable = ["ALL"] +unfixable = [] + +# Per-file ignores — only rules that FUNDAMENTALLY conflict with test code remain. +# Every other rule was fixed in source. See justifications below. +[tool.ruff.lint.per-file-ignores] +"**/tests/**/*.py" = [ + "ARG", # @patch decorators inject mock params that aren't always referenced; + # the patch side-effect is needed, not the mock object itself. + "D", # Test names like test_sub_cards_no_answer_text are self-documenting; + # docstrings would be redundant noise on every test method. + "PLC0415", # Test isolation requires importing AFTER mocking sys.modules; + # top-level imports would bypass the mocks entirely. + "PLR2004", # assert count == 5 is clearer than assert count == EXPECTED_COUNT; + # named constants for test expectations add indirection without value. + "S101", # assert IS what tests do — every Python test suite suppresses this. + "SLF001", # Unit tests must exercise private internals (_method, _attr) to reach + # 100% branch coverage; only integration tests can avoid this. +] +"**/test_*.py" = [ + "ARG", + "D", + "PLC0415", + "PLR2004", + "S101", + "SLF001", +] + + +[tool.ruff.lint.pydocstyle] +convention = "google" # Use Google docstring convention + +[tool.ruff.lint.isort] +force-single-line = false +force-sort-within-sections = true +known-first-party = ["python_pkg"] + +[tool.ruff.lint.flake8-quotes] +docstring-quotes = "double" +inline-quotes = "double" + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = true + +# ============================================================================ +# MYPY - Static type checker (most aggressive settings) +# ============================================================================ +[tool.mypy] +python_version = "3.10" +# Strict mode enables most checks +strict = true +# Additional aggressive settings +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +warn_unreachable = true +# Extra strict settings +disallow_any_unimported = true +disallow_any_explicit = false # Too aggressive for practical use +disallow_any_generics = true +disallow_subclassing_any = true +strict_equality = true +extra_checks = true +# Allow missing imports for third-party packages +ignore_missing_imports = true +# Show error codes +show_error_codes = true +# Enable colored output +color_output = true +# Exclude vendored directories +exclude = [ + "Bash/ffmpeg-build/", + ".venv/", + "linux_configuration/scripts/misc/testsAndMisc-bash/tools/", # Avoid duplicate module named 'tools' +] + +# ============================================================================ +# PYLINT - Comprehensive Python linter +# ============================================================================ +[tool.pylint.main] +# Analyse import fallback blocks +analyse-fallback-blocks = true +# Pickle collected data for later comparisons +persistent = true +# Jobs to use for parallel execution (0 = auto) +jobs = 0 +# Minimum Python version +py-version = "3.10" +# Ignore vendored directories +ignore = ["Bash", ".venv", "__pycache__"] +# Ignore patterns +ignore-patterns = [".*\\.pyi$"] +# Allow C extension modules to be introspected +extension-pkg-allow-list = ["cv2", "pygame", "lxml"] + +[tool.pylint.messages_control] +# Enable all checks by disabling disable +enable = "all" +# No disabled checks - maximum strictness +disable = [] + +[tool.pylint.design] +# Mixins and single-entry-point classes may have zero public methods +min-public-methods = 0 +# Test modules can be large +max-module-lines = 1000 +# UI/mixin classes accumulate attributes across multiple mixins +max-attributes = 10 + +[tool.pylint.spelling] +# No spelling dictionary to avoid false positives +spelling-dict = "" + +[tool.pylint.typecheck] +# cv2 (OpenCV) dynamically loads members from C extension at runtime. +# unittest.mock.MagicMock generates assertion/introspection methods at runtime. +generated-members = [ + "cv2.*", + ".*\\.assert_called_once_with", + ".*\\.assert_called_once", + ".*\\.assert_called", + ".*\\.assert_not_called", + ".*\\.assert_any_call", + ".*\\.call_args", + ".*\\.call_args_list", + ".*\\.call_count", +] + +# ============================================================================ +# BANDIT - Security linter +# ============================================================================ +[tool.bandit] +# Exclude test directories and vendored code +exclude_dirs = ["tests", ".venv", "Bash/ffmpeg-build"] + +# ============================================================================ +# BLACK & ISORT - Removed (ruff handles formatting and import sorting) +# ============================================================================ + +# ============================================================================ +# PYTEST - Testing framework configuration +# ============================================================================ +[tool.pytest.ini_options] +testpaths = ["python_pkg"] +python_files = ["test_*.py", "*_test.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "-v", + "--strict-markers", + "--strict-config", + "-ra", + "--cov=python_pkg", + "--cov-branch", + "--cov-report=term-missing", + "--cov-report=lcov", +] +filterwarnings = [ + "error", + "ignore::DeprecationWarning", + "default::pytest.PytestUnraisableExceptionWarning", +] + +# ============================================================================ +# COVERAGE - Code coverage configuration +# ============================================================================ +[tool.coverage.run] +source = ["python_pkg"] +branch = true +omit = [ + "*/__pycache__/*", + "*/tests/*", + "*/.venv/*", +] + +[tool.coverage.report] +# Fail under this percentage +fail_under = 100 +show_missing = true +skip_covered = false +exclude_lines = [ + # Standard exclusions + "pragma: no cover", + # Unreachable defensive code + "raise NotImplementedError", + "raise AssertionError", + # Type checking imports + "if TYPE_CHECKING:", + # Main script entry point + 'if __name__ == "__main__":', +] +# Partial branch exclusions for unreachable branches +partial_branches = [ + "pragma: no branch", +] + +# ============================================================================ +# VULTURE - Dead code detection +# ============================================================================ +# Note: Vulture uses command-line args, but we can document settings here +# vulture --min-confidence 80 --exclude ".venv,Bash" . + +# ============================================================================ +# FLAKE8 - Python linter (via Flake8-pyproject for pyproject.toml support) +# ============================================================================ +[tool.flake8] +# Maximum line length (matches ruff/black) +max-line-length = 88 +# Maximum McCabe complexity (matches ruff C901 threshold) +max-complexity = 10 +# Maximum cognitive complexity (flake8-cognitive-complexity) +max-cognitive-complexity = 12 +# Maximum function length (flake8-functions) +max-function-length = 20 +# Maximum returns/arguments per function +max-returns-amount = 6 +max-arguments = 5 +# Docstring convention (matches ruff) +docstring-convention = "google" +# Select all error codes +select = ["E", "F", "W", "C", "B", "B950"] +# Extend with plugin codes +extend-select = ["B", "B9", "C4", "SIM", "PT", "TC", "ANN"] +# Ignore rules that conflict with ruff-format or are duplicated +extend-ignore = [ + "E501", # Line too long - B950 from bugbear is smarter (allows 10% overflow) + "W503", # Line break before binary operator - contradicts PEP 8 update + "ANN101", # Missing type annotation for self + "ANN102", # Missing type annotation for cls +] +# Exclude directories +exclude = [ + ".git", + ".venv", + "__pycache__", + "build", + "dist", + ".eggs", + "Bash/ffmpeg-build", +] +# Per-file ignores +per-file-ignores = [ + "**/tests/**/*.py:S101,ANN", + "**/test_*.py:S101,ANN", +] + +# ============================================================================ +# PYDOCSTYLE - Docstring style checker (ruff handles this, but for standalone) +# ============================================================================ +# Configured in ruff.lint.pydocstyle above diff --git a/meta/requirements.txt b/meta/requirements.txt new file mode 100644 index 0000000..81169af --- /dev/null +++ b/meta/requirements.txt @@ -0,0 +1,79 @@ +# ============================================================================== +# testsAndMisc — combined runtime + development dependencies +# Install with: pip install -r meta/requirements.txt +# Sorted alphabetically (enforced by pre-commit `requirements-txt-fixer`). +# ============================================================================== + +add-trailing-comma>=3.1.0 +aiohttp>=3.9 +autoflake>=2.2.0 +autopep8>=2.0.0 +bandit>=1.7.0 +beautifulsoup4>=4.0 +berserk>=0.13 +black>=24.0.0 +bottle>=0.12 +codespell>=2.2.0 +coverage>=7.4.0 +darglint>=1.8.0 +dead>=1.5.0 +docformatter>=1.7.0 +fixit>=2.1.0 +flake8>=7.0.0 +flake8-annotations>=3.0.0 +flake8-bandit>=4.1.0 +flake8-bugbear>=24.0.0 +flake8-comprehensions>=3.14.0 +flake8-docstrings>=1.7.0 +flake8-eradicate>=1.5.0 +flake8-pie>=0.16.0 +flake8-print>=5.0.0 +flake8-pyi>=24.0.0 +flake8-pytest-style>=2.0.0 +flake8-return>=1.2.0 +flake8-simplify>=0.21.0 +genanki>=0.13 +geopandas>=1.0 +howlongtobeatpy>=1.0 +hypothesis>=6.98.0 +importlib-metadata>=7.0.0 +interrogate>=1.5.0 +isort>=5.13.0 +lxml>=5.0 +matplotlib>=3.0 +mccabe>=0.7.0 +mitmproxy>=10.0 +mypy>=1.8.0 +numpy>=1.20 +opencv-python>=4.0 +pillow>=10.0 +pip-audit>=2.6.0 +pipdeptree>=2.14.0 +pre-commit>=3.6.0 +prospector>=1.10.0 +pycodestyle>=2.11.0 +pydocstyle>=6.3.0 +pyflakes>=3.2.0 +pygame>=2.0 +pylama>=8.4.0 +pylint>=3.0.0 +pyright>=1.1.350 +pytest>=8.0.0 +pytest-cov>=4.1.0 +pytest-randomly>=3.15.0 +pytest-sugar>=1.0.0 +pytest-timeout>=2.2.0 +pytest-xdist>=3.5.0 +python-chess>=1.999 +pyupgrade>=3.15.0 +radon>=6.0.0 +reorder-python-imports>=3.12.0 +requests>=2.0 +ruff>=0.8.0 +safety>=2.3.0 +selenium>=4.0 +types-python-dateutil>=2.8.0 +types-PyYAML>=6.0.0 +types-requests>=2.31.0 +types-setuptools>=69.0.0 +websockets>=13.0 diff --git a/meta/run.sh b/meta/run.sh new file mode 100755 index 0000000..f445898 --- /dev/null +++ b/meta/run.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# Easy entrypoint for system usage reports and polling script diagnostics. +# Usage: +# ./run.sh # today's report to stdout +# ./run.sh --date 20260501 # specific day +# ./run.sh --top 25 # override row count +# ./run.sh --profile [duration] # profile polling scripts (default 60s) +# ./run.sh --diagnose # find inefficient shell scripts +# ./run.sh --init-artifacts ... # bootstrap contract/evidence/session artifacts +# +# Any other args are forwarded to usage_report.py unchanged. + +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPORT_SCRIPT="$SCRIPT_DIR/linux_configuration/scripts/system-maintenance/bin/usage_report.py" +ARTIFACT_INIT_SCRIPT="$SCRIPT_DIR/scripts/init_agent_artifacts.sh" + +if [[ ! -f "$REPORT_SCRIPT" ]]; then + echo "Error: usage_report.py not found at: $REPORT_SCRIPT" >&2 + exit 1 +fi + +if [[ ! -f "$ARTIFACT_INIT_SCRIPT" ]]; then + echo "Error: init_agent_artifacts.sh not found at: $ARTIFACT_INIT_SCRIPT" >&2 + exit 1 +fi + +# Profiling mode: trace fork-heavy scripts over time +profile_polling_scripts() { + local duration="${1:-60}" + echo "=== Polling Script Profiler (${duration}s) ===" >&2 + echo "Tracing fork/exec calls in shell scripts..." >&2 + echo "" >&2 + + # Find common polling script processes and trace them + local trace_file="/tmp/polling_trace_$$.txt" + + # Use perf/strace to capture system calls + ( + timeout "$duration" strace -f -e trace=clone,execve -c -p $$ 2>&1 || true + ) > "$trace_file" 2>&1 + + echo "Trace completed. Analyzing results:" >&2 + echo "" >&2 + + # Show fork/exec heavy processes + if ! grep -e "execve" -e "clone" "$trace_file" | head -20; then + : + fi + + rm -f "$trace_file" +} + +# Diagnostic mode: find inefficient patterns in shell scripts +diagnose_polling_scripts() { + echo "=== Shell Script Efficiency Audit ===" >&2 + echo "" >&2 + + local issues_found=0 + + # Check for common anti-patterns + echo "Checking for anti-patterns in shell scripts..." >&2 + echo "" >&2 + + # Pattern 1: while true with sleep (no event-driven check) + echo "1. Polling loops (while true + sleep):" >&2 + set +e + grep -r "while true\|while :" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ + | grep -v "Binary" | grep -v ".git" | head -5 + set -e + issues_found=$((issues_found + 1)) + echo "" >&2 + + # Pattern 2: $(date +...) calls in loops (fork-heavy) + echo "2. Excessive date calls (each forks a process):" >&2 + set +e + grep -r '\$(date' --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ + | grep -v "Binary" | grep -v ".git" | head -5 + set -e + issues_found=$((issues_found + 1)) + echo "" >&2 + + # Pattern 3: pgrep/xdotool in loops + echo "3. Process inspection in loops (pgrep, xdotool):" >&2 + set +e + grep -r "while.*pgrep\|while.*xdotool\|pgrep.*while" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ + | grep -v "Binary" | grep -v ".git" | head -5 + set -e + issues_found=$((issues_found + 1)) + echo "" >&2 + + # Pattern 4: pipes in hot paths + echo "4. Heavy pipes in polling scripts (| awk, | grep, | tr):" >&2 + set +e + while_true_file_list="$(mktemp)" + heavy_pipe_matches="$(mktemp)" + grep -r "while true" --include="*.sh" "$SCRIPT_DIR" > "$while_true_file_list" 2>/dev/null + if [ -s "$while_true_file_list" ]; then + xargs grep -l -e " | awk" -e " | grep" -e " | tr" < "$while_true_file_list" > "$heavy_pipe_matches" 2>/dev/null + head -5 "$heavy_pipe_matches" + fi + rm -f "$while_true_file_list" "$heavy_pipe_matches" + set -e + issues_found=$((issues_found + 1)) + echo "" >&2 + + # Pattern 5: sleep with very short intervals + echo "5. Aggressive polling (sleep < 1s):" >&2 + set +e + grep -rE "sleep 0\.[0-9]|sleep 0[^0-9]" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ + | grep -v "Binary" | grep -v ".git" | head -5 + set -e + issues_found=$((issues_found + 1)) + echo "" >&2 + + echo "=== Recommendations ===" >&2 + echo "1. Replace 'while true + sleep' with event-driven I/O (inotifywait, read -t, etc.)" >&2 + echo "2. Use /proc and /sys instead of forking date, sensors, acpi, etc." >&2 + echo "3. Cache frequently accessed values (e.g., in /tmp state files)" >&2 + echo "4. Use bash builtins: printf %()T instead of date, \${var//} instead of tr, etc." >&2 + echo "5. Use i3blocks interval=persist + event loop instead of polling mode" >&2 + echo "6. Increase polling intervals: 1s → 5s → 10s where acceptable" >&2 +} + +# Handle special modes +case "${1:-}" in + --profile) + profile_polling_scripts "${2:-60}" + exit 0 + ;; + --diagnose) + diagnose_polling_scripts + exit 0 + ;; + --init-artifacts) + shift + exec "$ARTIFACT_INIT_SCRIPT" "$@" + ;; + --help) + grep '^# Usage:' "$0" | sed 's/^# //' | head -1 + grep '^# ' "$0" | sed 's/^# / /' + exit 0 + ;; +esac + +# Default: run usage_report.py with all remaining args +exec python3 "$REPORT_SCRIPT" "$@" diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index cdb3b6f..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,308 +0,0 @@ -[project] -name = "testsandmisc" -version = "0.1.0" -description = "Collection of miscellaneous tests and scripts" -requires-python = ">=3.10" - -# ============================================================================ -# RUFF - Extremely fast Python linter and formatter (written in Rust) -# ============================================================================ -[tool.ruff] -target-version = "py310" -# Include all Python files -include = ["*.py", "**/*.py"] -# Exclude vendored/build directories -exclude = [ - ".git", - ".venv", - "__pycache__", - "build", - "dist", - ".eggs", - "Bash/ffmpeg-build", # Vendored FFmpeg tools -] - -[tool.ruff.lint] -# AGGRESSIVE: Select ALL rules from all categories -select = ["ALL"] -# Ignores for rules that are too strict for this mixed script repository -ignore = [ - # D203 vs D211 conflict - we use D211 (no blank line before class docstring) - "D203", # 1 blank line required before class docstring (conflicts with D211) - # D212 vs D213 conflict - we use D212 (summary on first line after """) - "D213", # Multi-line docstring summary should start at second line (conflicts with D212) - # Formatter conflicts - recommended to disable when using ruff format - # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules - "COM812", # Trailing comma missing - formatter handles this automatically - "ISC001", # Implicit string concatenation - formatter may create these when wrapping - # Security audit - prone to false positives with validated input - # https://github.com/astral-sh/ruff/issues/4045 - "S603", # subprocess call without shell - prone to false positives as it is - # difficult to determine whether the passed arguments have been validated -] - -# Allow ALL rules to be auto-fixed -fixable = ["ALL"] -unfixable = [] - -# Per-file ignores — only rules that FUNDAMENTALLY conflict with test code remain. -# Every other rule was fixed in source. See justifications below. -[tool.ruff.lint.per-file-ignores] -"**/tests/**/*.py" = [ - "ARG", # @patch decorators inject mock params that aren't always referenced; - # the patch side-effect is needed, not the mock object itself. - "D", # Test names like test_sub_cards_no_answer_text are self-documenting; - # docstrings would be redundant noise on every test method. - "PLC0415", # Test isolation requires importing AFTER mocking sys.modules; - # top-level imports would bypass the mocks entirely. - "PLR2004", # assert count == 5 is clearer than assert count == EXPECTED_COUNT; - # named constants for test expectations add indirection without value. - "S101", # assert IS what tests do — every Python test suite suppresses this. - "SLF001", # Unit tests must exercise private internals (_method, _attr) to reach - # 100% branch coverage; only integration tests can avoid this. -] -"**/test_*.py" = [ - "ARG", - "D", - "PLC0415", - "PLR2004", - "S101", - "SLF001", -] - - -[tool.ruff.lint.pydocstyle] -convention = "google" # Use Google docstring convention - -[tool.ruff.lint.isort] -force-single-line = false -force-sort-within-sections = true -known-first-party = ["python_pkg"] - -[tool.ruff.lint.flake8-quotes] -docstring-quotes = "double" -inline-quotes = "double" - -[tool.ruff.lint.flake8-tidy-imports] -ban-relative-imports = "all" - -[tool.ruff.format] -quote-style = "double" -indent-style = "space" -skip-magic-trailing-comma = false -line-ending = "auto" -docstring-code-format = true - -# ============================================================================ -# MYPY - Static type checker (most aggressive settings) -# ============================================================================ -[tool.mypy] -python_version = "3.10" -# Strict mode enables most checks -strict = true -# Additional aggressive settings -warn_return_any = true -warn_unused_configs = true -disallow_untyped_defs = true -disallow_incomplete_defs = true -check_untyped_defs = true -disallow_untyped_decorators = true -no_implicit_optional = true -warn_redundant_casts = true -warn_unused_ignores = true -warn_no_return = true -warn_unreachable = true -# Extra strict settings -disallow_any_unimported = true -disallow_any_explicit = false # Too aggressive for practical use -disallow_any_generics = true -disallow_subclassing_any = true -strict_equality = true -extra_checks = true -# Allow missing imports for third-party packages -ignore_missing_imports = true -# Show error codes -show_error_codes = true -# Enable colored output -color_output = true -# Exclude vendored directories -exclude = [ - "Bash/ffmpeg-build/", - ".venv/", - "linux_configuration/scripts/misc/testsAndMisc-bash/tools/", # Avoid duplicate module named 'tools' -] - -# ============================================================================ -# PYLINT - Comprehensive Python linter -# ============================================================================ -[tool.pylint.main] -# Analyse import fallback blocks -analyse-fallback-blocks = true -# Pickle collected data for later comparisons -persistent = true -# Jobs to use for parallel execution (0 = auto) -jobs = 0 -# Minimum Python version -py-version = "3.10" -# Ignore vendored directories -ignore = ["Bash", ".venv", "__pycache__"] -# Ignore patterns -ignore-patterns = [".*\\.pyi$"] -# Allow C extension modules to be introspected -extension-pkg-allow-list = ["cv2", "pygame", "lxml"] - -[tool.pylint.messages_control] -# Enable all checks by disabling disable -enable = "all" -# No disabled checks - maximum strictness -disable = [] - -[tool.pylint.design] -# Mixins and single-entry-point classes may have zero public methods -min-public-methods = 0 -# Test modules can be large -max-module-lines = 1000 -# UI/mixin classes accumulate attributes across multiple mixins -max-attributes = 10 - -[tool.pylint.spelling] -# No spelling dictionary to avoid false positives -spelling-dict = "" - -[tool.pylint.typecheck] -# cv2 (OpenCV) dynamically loads members from C extension at runtime. -# unittest.mock.MagicMock generates assertion/introspection methods at runtime. -generated-members = [ - "cv2.*", - ".*\\.assert_called_once_with", - ".*\\.assert_called_once", - ".*\\.assert_called", - ".*\\.assert_not_called", - ".*\\.assert_any_call", - ".*\\.call_args", - ".*\\.call_args_list", - ".*\\.call_count", -] - -# ============================================================================ -# BANDIT - Security linter -# ============================================================================ -[tool.bandit] -# Exclude test directories and vendored code -exclude_dirs = ["tests", ".venv", "Bash/ffmpeg-build"] - -# ============================================================================ -# BLACK & ISORT - Removed (ruff handles formatting and import sorting) -# ============================================================================ - -# ============================================================================ -# PYTEST - Testing framework configuration -# ============================================================================ -[tool.pytest.ini_options] -testpaths = ["python_pkg"] -python_files = ["test_*.py", "*_test.py"] -python_classes = ["Test*"] -python_functions = ["test_*"] -addopts = [ - "-v", - "--strict-markers", - "--strict-config", - "-ra", - "--cov=python_pkg", - "--cov-branch", - "--cov-report=term-missing", - "--cov-report=lcov", -] -filterwarnings = [ - "error", - "ignore::DeprecationWarning", - "default::pytest.PytestUnraisableExceptionWarning", -] - -# ============================================================================ -# COVERAGE - Code coverage configuration -# ============================================================================ -[tool.coverage.run] -source = ["python_pkg"] -branch = true -omit = [ - "*/__pycache__/*", - "*/tests/*", - "*/.venv/*", -] - -[tool.coverage.report] -# Fail under this percentage -fail_under = 100 -show_missing = true -skip_covered = false -exclude_lines = [ - # Standard exclusions - "pragma: no cover", - # Unreachable defensive code - "raise NotImplementedError", - "raise AssertionError", - # Type checking imports - "if TYPE_CHECKING:", - # Main script entry point - 'if __name__ == "__main__":', -] -# Partial branch exclusions for unreachable branches -partial_branches = [ - "pragma: no branch", -] - -# ============================================================================ -# VULTURE - Dead code detection -# ============================================================================ -# Note: Vulture uses command-line args, but we can document settings here -# vulture --min-confidence 80 --exclude ".venv,Bash" . - -# ============================================================================ -# FLAKE8 - Python linter (via Flake8-pyproject for pyproject.toml support) -# ============================================================================ -[tool.flake8] -# Maximum line length (matches ruff/black) -max-line-length = 88 -# Maximum McCabe complexity (matches ruff C901 threshold) -max-complexity = 10 -# Maximum cognitive complexity (flake8-cognitive-complexity) -max-cognitive-complexity = 12 -# Maximum function length (flake8-functions) -max-function-length = 20 -# Maximum returns/arguments per function -max-returns-amount = 6 -max-arguments = 5 -# Docstring convention (matches ruff) -docstring-convention = "google" -# Select all error codes -select = ["E", "F", "W", "C", "B", "B950"] -# Extend with plugin codes -extend-select = ["B", "B9", "C4", "SIM", "PT", "TC", "ANN"] -# Ignore rules that conflict with ruff-format or are duplicated -extend-ignore = [ - "E501", # Line too long - B950 from bugbear is smarter (allows 10% overflow) - "W503", # Line break before binary operator - contradicts PEP 8 update - "ANN101", # Missing type annotation for self - "ANN102", # Missing type annotation for cls -] -# Exclude directories -exclude = [ - ".git", - ".venv", - "__pycache__", - "build", - "dist", - ".eggs", - "Bash/ffmpeg-build", -] -# Per-file ignores -per-file-ignores = [ - "**/tests/**/*.py:S101,ANN", - "**/test_*.py:S101,ANN", -] - -# ============================================================================ -# PYDOCSTYLE - Docstring style checker (ruff handles this, but for standalone) -# ============================================================================ -# Configured in ruff.lint.pydocstyle above diff --git a/pyproject.toml b/pyproject.toml new file mode 120000 index 0000000..7a7a90a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1 @@ +meta/pyproject.toml \ No newline at end of file diff --git a/python_pkg/geo_data/__init__.py b/python_pkg/geo_data/__init__.py deleted file mode 100644 index 1eb5745..0000000 --- a/python_pkg/geo_data/__init__.py +++ /dev/null @@ -1,206 +0,0 @@ -"""Shared geographic data module for Warsaw and Poland Anki generators. - -This module handles downloading and caching geographic data from various sources: -- OpenStreetMap via Overpass API -- Geofabrik OSM extracts -- GitHub repositories with pre-processed GeoJSON - -All data is cached locally to avoid repeated downloads. -""" - -from __future__ import annotations - -import shutil -import sys - -from python_pkg.geo_data._common import ( - CACHE_DIR, - MAX_RETRIES, - MIN_LAKE_AREA_KM2, - MIN_LINE_COORDS, - MIN_PEAK_ELEVATION, - MIN_RING_COORDS, - MIN_RIVER_LENGTH_KM, - OVERPASS_ENDPOINTS, - POLSKA_GEOJSON_BASE, - REQUEST_TIMEOUT, - RETRY_DELAY, - WIKIDATA_SPARQL, -) -from python_pkg.geo_data._poland_admin import ( - get_poland_boundary, - get_polish_gminy, - get_polish_powiaty, - get_polish_wojewodztwa, -) -from python_pkg.geo_data._poland_nature import ( - get_polish_forests, - get_polish_landscape_parks, - get_polish_mountain_peaks, - get_polish_mountain_ranges, - get_polish_national_parks, - get_polish_nature_reserves, -) -from python_pkg.geo_data._poland_water import ( - get_polish_coastal_features, - get_polish_islands, - get_polish_lakes, - get_polish_rivers, - get_polish_unesco_sites, -) -from python_pkg.geo_data._warsaw import ( - get_vistula_river, - get_warsaw_boundary, - get_warsaw_bridges, - get_warsaw_districts, - get_warsaw_metro_stations, - get_warsaw_osiedla, -) -from python_pkg.geo_data._warsaw_places import get_warsaw_landmarks, get_warsaw_streets - -__all__ = [ - "CACHE_DIR", - "MAX_RETRIES", - "MIN_LAKE_AREA_KM2", - "MIN_LINE_COORDS", - "MIN_PEAK_ELEVATION", - "MIN_RING_COORDS", - "MIN_RIVER_LENGTH_KM", - "OVERPASS_ENDPOINTS", - "POLSKA_GEOJSON_BASE", - "REQUEST_TIMEOUT", - "RETRY_DELAY", - "WIKIDATA_SPARQL", - "clear_cache", - "download_all_poland_data", - "download_all_warsaw_data", - "get_poland_boundary", - "get_polish_coastal_features", - "get_polish_forests", - "get_polish_gminy", - "get_polish_islands", - "get_polish_lakes", - "get_polish_landscape_parks", - "get_polish_mountain_peaks", - "get_polish_mountain_ranges", - "get_polish_national_parks", - "get_polish_nature_reserves", - "get_polish_powiaty", - "get_polish_rivers", - "get_polish_unesco_sites", - "get_polish_wojewodztwa", - "get_vistula_river", - "get_warsaw_boundary", - "get_warsaw_bridges", - "get_warsaw_districts", - "get_warsaw_landmarks", - "get_warsaw_metro_stations", - "get_warsaw_osiedla", - "get_warsaw_streets", -] - - -def download_all_warsaw_data() -> None: - """Download and cache all Warsaw geographic data. - - Call this once to pre-populate the cache. - """ - sys.stdout.write("Downloading all Warsaw geographic data...\n") - sys.stdout.write("=" * 60 + "\n") - - sys.stdout.write("\n1. Warsaw boundary...\n") - get_warsaw_boundary() - - sys.stdout.write("\n2. Vistula river...\n") - get_vistula_river() - - sys.stdout.write("\n3. Warsaw bridges...\n") - get_warsaw_bridges() - - sys.stdout.write("\n4. Metro stations...\n") - get_warsaw_metro_stations() - - sys.stdout.write("\n5. Major streets...\n") - get_warsaw_streets() - - sys.stdout.write("\n6. Landmarks...\n") - get_warsaw_landmarks() - - sys.stdout.write("\n7. Osiedla...\n") - get_warsaw_osiedla() - - sys.stdout.write("\n" + "=" * 60 + "\n") - sys.stdout.write("All Warsaw data cached successfully!\n") - - -def download_all_poland_data() -> None: - """Download and cache all Poland geographic data. - - Call this once to pre-populate the cache. - """ - sys.stdout.write("Downloading all Poland geographic data...\n") - sys.stdout.write("=" * 60 + "\n") - - sys.stdout.write("\n1. Województwa...\n") - get_polish_wojewodztwa() - - sys.stdout.write("\n2. Powiaty...\n") - get_polish_powiaty() - - sys.stdout.write("\n3. Gminy (this may take a while)...\n") - get_polish_gminy() - - sys.stdout.write("\n4. Poland boundary...\n") - get_poland_boundary() - - sys.stdout.write("\n" + "=" * 60 + "\n") - sys.stdout.write("All Poland data cached successfully!\n") - - -def clear_cache() -> None: - """Clear all cached data.""" - if CACHE_DIR.exists(): - shutil.rmtree(CACHE_DIR) - sys.stdout.write("Cache cleared.\n") - else: - sys.stdout.write("Cache directory does not exist.\n") - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Manage geographic data cache") - parser.add_argument( - "--download-warsaw", - action="store_true", - help="Download all Warsaw data", - ) - parser.add_argument( - "--download-poland", - action="store_true", - help="Download all Poland data", - ) - parser.add_argument( - "--download-all", - action="store_true", - help="Download all data", - ) - parser.add_argument( - "--clear-cache", - action="store_true", - help="Clear cached data", - ) - - args = parser.parse_args() - - if args.clear_cache: - clear_cache() - elif args.download_warsaw: - download_all_warsaw_data() - elif args.download_poland: - download_all_poland_data() - elif args.download_all: - download_all_warsaw_data() - download_all_poland_data() - else: - parser.print_help() diff --git a/python_pkg/geo_data/_common.py b/python_pkg/geo_data/_common.py deleted file mode 100644 index 38e8e27..0000000 --- a/python_pkg/geo_data/_common.py +++ /dev/null @@ -1,317 +0,0 @@ -"""Common utilities for geographic data operations. - -Shared constants, API helpers, and geometry extraction functions used -across the geo_data package. -""" - -from __future__ import annotations - -import json -from pathlib import Path -import sys -import time -from typing import TYPE_CHECKING - -import geopandas as gpd -import requests -from shapely.geometry import ( - GeometryCollection, - MultiPolygon, - Polygon, -) - -if TYPE_CHECKING: - from typing import Any - -# Parent directory of the geo_data package (i.e. python_pkg/) -_PKG_DIR = Path(__file__).resolve().parent.parent - -# Shared cache directory for all geo data -CACHE_DIR = _PKG_DIR / "geo_cache" - -# Overpass API endpoints (multiple for redundancy) -# Note: kumi.systems is more reliable, so it's first -OVERPASS_ENDPOINTS = [ - "https://overpass.kumi.systems/api/interpreter", - "https://overpass-api.de/api/interpreter", - "https://maps.mail.ru/osm/tools/overpass/api/interpreter", -] - -# GitHub URLs for pre-processed data -POLSKA_GEOJSON_BASE = "https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master" - -# Wikidata SPARQL endpoint -WIKIDATA_SPARQL = "https://query.wikidata.org/sparql" - -# Request timeout and retry settings -REQUEST_TIMEOUT = 180 -MAX_RETRIES = 3 -RETRY_DELAY = 5 - -# Data thresholds for filtering -MIN_PEAK_ELEVATION = 300 # meters -MIN_LAKE_AREA_KM2 = 0.5 # km² -MIN_RIVER_LENGTH_KM = 10 # km -MIN_LINE_COORDS = 2 # minimum coordinates for a line -MIN_RING_COORDS = 4 # minimum coordinates for a polygon ring - - -def _ensure_cache_dir() -> None: - """Create cache directory if it doesn't exist.""" - CACHE_DIR.mkdir(parents=True, exist_ok=True) - - -def _extract_polygonal_geometry( - geom: Polygon | MultiPolygon | GeometryCollection, -) -> Polygon | MultiPolygon | None: - """Extract only polygonal geometry from a geometry that may be mixed. - - Some OSM data comes as GeometryCollections containing polygons mixed with - lines. This function extracts only the polygon/multipolygon parts. - - Args: - geom: Input geometry (Polygon, MultiPolygon, or GeometryCollection). - - Returns: - Polygon or MultiPolygon with only the polygonal parts, or None if empty. - """ - if isinstance(geom, Polygon | MultiPolygon): - return geom - - if isinstance(geom, GeometryCollection): - polygons = [g for g in geom.geoms if isinstance(g, Polygon | MultiPolygon)] - if not polygons: - return None - if len(polygons) == 1: - return polygons[0] - # Flatten MultiPolygons and combine all polygons - all_polys = [] - for p in polygons: - if isinstance(p, Polygon): - all_polys.append(p) - elif isinstance(p, MultiPolygon): # pragma: no branch - all_polys.extend(p.geoms) - return MultiPolygon(all_polys) - - return None - - -def _try_single_request( - endpoint: str, query: str -) -> tuple[dict[str, Any] | None, Exception | None]: - """Try a single request to an endpoint. - - Args: - endpoint: Overpass API endpoint URL. - query: Overpass QL query string. - - Returns: - Tuple of (result, error). One will be None. - """ - try: - sys.stdout.write(f" Querying {endpoint}...\n") - response = requests.post( - endpoint, - data={"data": query}, - timeout=REQUEST_TIMEOUT, - ) - response.raise_for_status() - result = response.json() - except (requests.RequestException, requests.Timeout, ValueError) as e: - return None, e - else: - # Check for valid response with elements - if not isinstance(result, dict) or "elements" not in result: - return None, ValueError("Invalid response format") - return result, None - - -def _overpass_query(query: str) -> dict[str, Any]: - """Execute an Overpass API query with retry logic. - - Args: - query: Overpass QL query string. - - Returns: - JSON response from the API. - - Raises: - RuntimeError: If all endpoints fail. - """ - last_error: Exception | None = None - - for endpoint in OVERPASS_ENDPOINTS: - for attempt in range(MAX_RETRIES): - result, error = _try_single_request(endpoint, query) - if result is not None: - return result - last_error = error - sys.stdout.write(f" Attempt {attempt + 1} failed: {error}\n") - if attempt < MAX_RETRIES - 1: - time.sleep(RETRY_DELAY) - - msg = f"All Overpass API endpoints failed. Last error: {last_error}" - raise RuntimeError(msg) - - -def _download_github_geojson(url: str, cache_path: Path) -> gpd.GeoDataFrame: - """Download GeoJSON from GitHub and cache it. - - Args: - url: URL to download from. - cache_path: Path to cache the data. - - Returns: - GeoDataFrame with the data. - """ - if cache_path.exists(): - return gpd.read_file(cache_path) - - sys.stdout.write(f"Downloading from {url}...\n") - if not url.startswith(("http://", "https://")): - msg = f"Unsupported URL scheme: {url}" - raise ValueError(msg) - response = requests.get(url, timeout=REQUEST_TIMEOUT) - data = response.json() - - _ensure_cache_dir() - cache_path.write_text(json.dumps(data)) - - return gpd.GeoDataFrame.from_features(data["features"], crs="EPSG:4326") - - -def _extract_osiedla_rings( - element: dict[str, Any], min_coords: int -) -> tuple[list[list[tuple[float, float]]], list[list[tuple[float, float]]]]: - """Extract outer and inner rings from an OSM relation. - - Args: - element: OSM relation element. - min_coords: Minimum number of coordinates for a valid ring. - - Returns: - Tuple of (outer_rings, inner_rings). - """ - outer_rings: list[list[tuple[float, float]]] = [] - inner_rings: list[list[tuple[float, float]]] = [] - - for member in element.get("members", []): - if "geometry" not in member: - continue - ring = [(p["lon"], p["lat"]) for p in member["geometry"]] - if len(ring) < min_coords: - continue - # Close the ring if not closed - if ring[0] != ring[-1]: - ring.append(ring[0]) - if member.get("role") == "outer": - outer_rings.append(ring) - elif member.get("role") == "inner": - inner_rings.append(ring) - - return outer_rings, inner_rings - - -def _build_osiedla_geometry( - outer_rings: list[list[tuple[float, float]]], - inner_rings: list[list[tuple[float, float]]], -) -> dict[str, Any]: - """Build GeoJSON geometry from outer and inner rings. - - Args: - outer_rings: List of outer ring coordinates. - inner_rings: List of inner ring coordinates. - - Returns: - GeoJSON geometry dict. - """ - if len(outer_rings) == 1: - return { - "type": "Polygon", - "coordinates": [outer_rings[0], *inner_rings], - } - # Multiple outer rings - create MultiPolygon - # Each polygon in a MultiPolygon is [exterior, hole1, hole2, ...] - return { - "type": "MultiPolygon", - "coordinates": [[ring] for ring in outer_rings], - } - - -def _extract_polygon_from_element( - element: dict[str, Any], -) -> dict[str, Any] | None: - """Extract polygon geometry from an OSM relation or way element. - - Args: - element: OSM element (relation or way). - - Returns: - GeoJSON geometry dict, or None if extraction fails. - """ - if element.get("type") == "relation": - outer_rings, inner_rings = _extract_osiedla_rings(element, MIN_RING_COORDS) - if not outer_rings: - return None - return _build_osiedla_geometry(outer_rings, inner_rings) - - if element.get("type") == "way" and "geometry" in element: - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) < MIN_RING_COORDS: - return None - if coords[0] != coords[-1]: - coords.append(coords[0]) - return {"type": "Polygon", "coordinates": [coords]} - - return None - - -def _extract_line_from_way(element: dict[str, Any]) -> dict[str, Any] | None: - """Extract line geometry from an OSM way element. - - Args: - element: OSM way element. - - Returns: - GeoJSON LineString geometry dict, or None if extraction fails. - """ - if element.get("type") != "way" or "geometry" not in element: - return None - - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) < MIN_LINE_COORDS: - return None - - return {"type": "LineString", "coordinates": coords} - - -def _add_area_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """Add area_km2 column to a GeoDataFrame. - - Args: - gdf: GeoDataFrame with polygon geometries. - - Returns: - GeoDataFrame with area_km2 column added. - """ - if len(gdf) == 0: - return gdf - gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system - gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 - return gdf - - -def _add_length_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """Add length_km column to a GeoDataFrame. - - Args: - gdf: GeoDataFrame with line geometries. - - Returns: - GeoDataFrame with length_km column added. - """ - if len(gdf) == 0: - return gdf - gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system - gdf["length_km"] = gdf_proj.geometry.length / 1000 - return gdf diff --git a/python_pkg/geo_data/_poland_admin.py b/python_pkg/geo_data/_poland_admin.py deleted file mode 100644 index f6e5550..0000000 --- a/python_pkg/geo_data/_poland_admin.py +++ /dev/null @@ -1,225 +0,0 @@ -"""Polish administrative boundary data. - -Functions for downloading and caching Polish administrative divisions: -województwa, powiaty, gminy, and the national boundary. -Includes Wikidata integration for population data. -""" - -from __future__ import annotations - -import contextlib -import json -import sys -from typing import TYPE_CHECKING - -import geopandas as gpd -import requests - -from python_pkg.geo_data._common import ( - CACHE_DIR, - POLSKA_GEOJSON_BASE, - WIKIDATA_SPARQL, - _add_area_column, - _build_osiedla_geometry, - _download_github_geojson, - _ensure_cache_dir, - _extract_osiedla_rings, - _overpass_query, -) - -if TYPE_CHECKING: - from typing import Any - - -def _query_wikidata(query: str) -> list[dict[str, Any]]: - """Query Wikidata SPARQL endpoint. - - Args: - query: SPARQL query string. - - Returns: - List of result bindings. - """ - response = requests.get( - WIKIDATA_SPARQL, - params={"query": query, "format": "json"}, - timeout=60, - ) - response.raise_for_status() - return response.json()["results"]["bindings"] - - -def _get_powiaty_population() -> dict[str, int]: - """Get population data for all Polish powiaty from Wikidata. - - Returns: - Dictionary mapping powiat name to population. - """ - cache_path = CACHE_DIR / "powiaty_population.json" - - if cache_path.exists(): - return json.loads(cache_path.read_text()) - - # Query Wikidata for all powiaty (Q247073) in Poland (Q36) with population - # Filter to only current Polish powiaty using country=Poland filter - query = """ - SELECT ?powiat ?powiatLabel ?population WHERE { - ?powiat wdt:P31 wd:Q247073. - ?powiat wdt:P17 wd:Q36. - ?powiat wdt:P1082 ?population. - SERVICE wikibase:label { bd:serviceParam wikibase:language "pl,en". } - } - ORDER BY DESC(?population) - """ - - sys.stdout.write("Fetching powiaty population data from Wikidata...\n") - results = _query_wikidata(query) - - population_map: dict[str, int] = {} - for item in results: - label = item.get("powiatLabel", {}).get("value", "") - pop = item.get("population", {}).get("value", "0") - if label and pop: - # Remove "powiat" prefix if present for matching - clean_label = label.replace("powiat ", "").strip() - with contextlib.suppress(ValueError): - population_map[clean_label] = int(pop) - - _ensure_cache_dir() - cache_path.write_text(json.dumps(population_map, ensure_ascii=False, indent=2)) - - sys.stdout.write(f"Cached population data for {len(population_map)} powiaty.\n") - return population_map - - -def get_polish_wojewodztwa() -> gpd.GeoDataFrame: - """Get Polish województwa (voivodeships). - - Returns: - GeoDataFrame with województwa boundaries. - """ - url = f"{POLSKA_GEOJSON_BASE}/wojewodztwa/wojewodztwa-min.geojson" - cache_path = CACHE_DIR / "polish_wojewodztwa.geojson" - return _download_github_geojson(url, cache_path) - - -def get_polish_powiaty() -> gpd.GeoDataFrame: - """Get Polish powiaty (counties), sorted by population descending. - - Returns: - GeoDataFrame with powiat boundaries and population. - """ - url = f"{POLSKA_GEOJSON_BASE}/powiaty/powiaty-min.geojson" - cache_path = CACHE_DIR / "polish_powiaty.geojson" - gdf = _download_github_geojson(url, cache_path) - - # Get population data from Wikidata - population_map = _get_powiaty_population() - - # Add population column - def get_population(nazwa: str) -> int: - """Match powiat name to population data.""" - if not nazwa: - return 0 - # Remove "powiat " prefix for matching - clean_name = nazwa.replace("powiat ", "").strip() - # Try direct match - if clean_name in population_map: - return population_map[clean_name] - # Try lowercase - name_lower = clean_name.lower() - for pop_name, pop in population_map.items(): - if pop_name.lower() == name_lower: - return pop - return 0 - - gdf["population"] = gdf["nazwa"].apply(get_population) - - # Sort by population descending - return gdf.sort_values("population", ascending=False).reset_index(drop=True) - - -def get_polish_gminy() -> gpd.GeoDataFrame: - """Get Polish gminy (municipalities) from OSM, sorted by area descending. - - Returns: - GeoDataFrame with gminy boundaries. - """ - cache_path = CACHE_DIR / "polish_gminy.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching gminy data from OSM (this may take a while)...\n") - # Polish gminy are admin_level=7 in OSM - query = """ - [out:json][timeout:300]; - area["ISO3166-1"="PL"]->.pl; - relation["boundary"="administrative"]["admin_level"="7"]["name"](area.pl); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - min_ring_coords = 4 - - for element in data.get("elements", []): - if element.get("type") != "relation": - continue - - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) - if not outer_rings: - continue - - seen_names.add(name) - features.append( - { - "type": "Feature", - "properties": {"name": name}, - "geometry": _build_osiedla_geometry(outer_rings, inner_rings), - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson)) - - sys.stdout.write(f"Cached {len(features)} gminy.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - - # Add area column - gdf = _add_area_column(gdf) - - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - - -def get_poland_boundary() -> gpd.GeoDataFrame: - """Get Poland country boundary. - - Returns: - GeoDataFrame with Poland boundary. - """ - cache_path = CACHE_DIR / "poland_boundary.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - # Dissolve from województwa - woj = get_polish_wojewodztwa() - # Fix invalid geometries with buffer(0) - woj["geometry"] = woj["geometry"].buffer(0) - poland = gpd.GeoDataFrame(geometry=[woj.union_all()], crs=woj.crs) - - _ensure_cache_dir() - poland.to_file(cache_path, driver="GeoJSON") - - return poland diff --git a/python_pkg/geo_data/_poland_nature.py b/python_pkg/geo_data/_poland_nature.py deleted file mode 100644 index 222ed0b..0000000 --- a/python_pkg/geo_data/_poland_nature.py +++ /dev/null @@ -1,446 +0,0 @@ -"""Polish natural land features. - -Functions for downloading and caching data about Polish mountains, -national parks, forests, nature reserves, and landscape parks. -""" - -from __future__ import annotations - -import contextlib -import json -import sys -from typing import TYPE_CHECKING - -import geopandas as gpd - -from python_pkg.geo_data._common import ( - CACHE_DIR, - MIN_PEAK_ELEVATION, - _add_area_column, - _build_osiedla_geometry, - _ensure_cache_dir, - _extract_osiedla_rings, - _extract_polygon_from_element, - _extract_polygonal_geometry, - _overpass_query, -) - -if TYPE_CHECKING: - from typing import Any - - -def get_polish_mountain_peaks() -> gpd.GeoDataFrame: - """Get Polish mountain peaks, sorted by elevation descending. - - Returns: - GeoDataFrame with mountain peak points and elevation. - """ - cache_path = CACHE_DIR / "polish_mountain_peaks.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - return gdf.sort_values("elevation", ascending=False).reset_index(drop=True) - - sys.stdout.write("Fetching mountain peaks data from OSM...\n") - query = """ - [out:json][timeout:120]; - area["ISO3166-1"="PL"]->.pl; - ( - node["natural"="peak"]["name"]["ele"](area.pl); - ); - out; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - if element.get("type") != "node": - continue - - name = element.get("tags", {}).get("name", "") - ele_str = element.get("tags", {}).get("ele", "") - - if not name or not ele_str or name in seen_names: - continue - - with contextlib.suppress(ValueError): - elevation = float(ele_str.replace(",", ".").split()[0]) - if elevation < MIN_PEAK_ELEVATION: - continue - - seen_names.add(name) - features.append( - { - "type": "Feature", - "properties": {"name": name, "elevation": elevation}, - "geometry": { - "type": "Point", - "coordinates": [element["lon"], element["lat"]], - }, - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} mountain peaks.\n") - - if not features: - msg = "No mountain peaks found in OSM data" - raise ValueError(msg) - - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - return gdf.sort_values("elevation", ascending=False).reset_index(drop=True) - - -def get_polish_mountain_ranges() -> gpd.GeoDataFrame: - """Get Polish mountain ranges, sorted by area descending. - - Returns: - GeoDataFrame with mountain range polygons. - """ - cache_path = CACHE_DIR / "polish_mountain_ranges.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - # Fix invalid geometries from OSM data and extract only polygons - gdf["geometry"] = gdf.geometry.make_valid() - gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) - gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching mountain ranges data from OSM...\n") - query = """ - [out:json][timeout:180]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["natural"="mountain_range"]["name"](area.pl); - way["natural"="mountain_range"]["name"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - features: list[dict[str, Any]] = [] - seen_names: set[str] = set() - min_ring_coords = 4 - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - if element.get("type") == "relation": - outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) - if not outer_rings: - continue - geometry = _build_osiedla_geometry(outer_rings, inner_rings) - elif element.get("type") == "way" and "geometry" in element: - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) < min_ring_coords: - continue - if coords[0] != coords[-1]: - coords.append(coords[0]) - geometry = {"type": "Polygon", "coordinates": [coords]} - else: - continue - - seen_names.add(name) - features.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geometry} - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} mountain ranges.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - - # Fix invalid geometries from OSM data and extract only polygons - gdf["geometry"] = gdf.geometry.make_valid() - gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) - gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] - - # Calculate area in km² - gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system - gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 - - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - - -def get_polish_national_parks() -> gpd.GeoDataFrame: - """Get all 23 Polish national parks, sorted by area descending. - - Returns: - GeoDataFrame with national park polygons. - """ - cache_path = CACHE_DIR / "polish_national_parks.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching national parks data from OSM...\n") - query = """ - [out:json][timeout:180]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["boundary"="national_park"]["name"](area.pl); - relation["leisure"="nature_reserve"]["name"]["protect_class"="2"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - min_ring_coords = 4 - - for element in data.get("elements", []): - if element.get("type") != "relation": - continue - - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - # Filter to only include "Park Narodowy" in name - if "Narodowy" not in name and "narodowy" not in name.lower(): - continue - - outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) - if not outer_rings: - continue - - seen_names.add(name) - features.append( - { - "type": "Feature", - "properties": {"name": name}, - "geometry": _build_osiedla_geometry(outer_rings, inner_rings), - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} national parks.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - - # Calculate area in km² - gdf_proj = gdf.to_crs("EPSG:2180") - gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 - - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - - -def get_polish_forests() -> gpd.GeoDataFrame: - """Get major Polish forests (puszcze), sorted by area descending. - - Returns: - GeoDataFrame with forest polygons. - """ - cache_path = CACHE_DIR / "polish_forests.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching forests data from OSM...\n") - # Query for named forests, especially "Puszcza" type - query = """ - [out:json][timeout:300]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["natural"="wood"]["name"](area.pl); - relation["landuse"="forest"]["name"~"Puszcza|Bory|Las"](area.pl); - way["natural"="wood"]["name"~"Puszcza|Bory"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - forest_keywords = ("Puszcza", "Bory", "Las ", "Lasy ") - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - if not any(keyword in name for keyword in forest_keywords): - continue - - geometry = _extract_polygon_from_element(element) - if geometry is None: - continue - - seen_names.add(name) - features.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geometry} - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} forests.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - gdf = _add_area_column(gdf) - - if len(gdf) > 0: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - -def get_polish_nature_reserves() -> gpd.GeoDataFrame: - """Get Polish nature reserves, sorted by area descending. - - Returns: - GeoDataFrame with nature reserve polygons. - """ - cache_path = CACHE_DIR / "polish_nature_reserves.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write( - "Fetching nature reserves data from OSM (this may take a while)...\n" - ) - query = """ - [out:json][timeout:600]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["leisure"="nature_reserve"]["name"](area.pl); - way["leisure"="nature_reserve"]["name"](area.pl); - relation["boundary"="protected_area"]["protect_class"="4"]["name"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - geometry = _extract_polygon_from_element(element) - if geometry is None: - continue - - seen_names.add(name) - features.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geometry} - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} nature reserves.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - gdf = _add_area_column(gdf) - - if len(gdf) > 0: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - -def get_polish_landscape_parks() -> gpd.GeoDataFrame: - """Get Polish landscape parks, sorted by area descending. - - Returns: - GeoDataFrame with landscape park polygons. - """ - cache_path = CACHE_DIR / "polish_landscape_parks.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - # Fix invalid geometries from OSM data and extract only polygons - gdf["geometry"] = gdf.geometry.make_valid() - gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) - # Remove any rows where geometry extraction failed - gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching landscape parks data from OSM...\n") - query = """ - [out:json][timeout:300]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["boundary"="protected_area"]["protect_class"="5"]["name"](area.pl); - relation["leisure"="nature_reserve"]["name"~"Park Krajobrazowy"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - min_ring_coords = 4 - - for element in data.get("elements", []): - if element.get("type") != "relation": - continue - - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) - if not outer_rings: - continue - - seen_names.add(name) - features.append( - { - "type": "Feature", - "properties": {"name": name}, - "geometry": _build_osiedla_geometry(outer_rings, inner_rings), - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} landscape parks.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - - # Fix invalid geometries from OSM data and extract only polygons - gdf["geometry"] = gdf.geometry.make_valid() - gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) - # Remove any rows where geometry extraction failed - gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] - - if len(gdf) > 0: - gdf_proj = gdf.to_crs("EPSG:2180") - gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - - return gdf diff --git a/python_pkg/geo_data/_poland_water.py b/python_pkg/geo_data/_poland_water.py deleted file mode 100644 index 76d2807..0000000 --- a/python_pkg/geo_data/_poland_water.py +++ /dev/null @@ -1,437 +0,0 @@ -"""Polish water features and cultural sites. - -Functions for downloading and caching data about Polish lakes, rivers, -islands, coastal features, and UNESCO World Heritage sites. -""" - -from __future__ import annotations - -import json -import sys -from typing import TYPE_CHECKING - -import geopandas as gpd - -from python_pkg.geo_data._common import ( - CACHE_DIR, - MIN_LAKE_AREA_KM2, - MIN_LINE_COORDS, - MIN_RING_COORDS, - MIN_RIVER_LENGTH_KM, - _add_area_column, - _add_length_column, - _build_osiedla_geometry, - _ensure_cache_dir, - _extract_osiedla_rings, - _extract_polygon_from_element, - _overpass_query, -) - -if TYPE_CHECKING: - from typing import Any - - -def _extract_coastal_geometry( - element: dict[str, Any], - natural_type: str, - line_types: tuple[str, ...], -) -> dict[str, Any] | None: - """Extract geometry from a coastal feature element. - - For cliffs and beaches, returns LineString. For others, returns Polygon. - - Args: - element: OSM element. - natural_type: The natural= tag value. - line_types: Tuple of natural types that should be lines. - - Returns: - GeoJSON geometry dict, or None if extraction fails. - """ - if element.get("type") == "relation": - return _extract_polygon_from_element(element) - - if element.get("type") != "way" or "geometry" not in element: - return None - - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) < MIN_LINE_COORDS: - return None - - # For cliffs and beaches, keep as linestring - if natural_type in line_types: - return {"type": "LineString", "coordinates": coords} - - # Otherwise try to make a polygon - if len(coords) >= MIN_RING_COORDS: - if coords[0] != coords[-1]: - coords.append(coords[0]) - return {"type": "Polygon", "coordinates": [coords]} - - return None - - -def _extract_river_coords_from_element( - element: dict[str, Any], -) -> list[list[tuple[float, float]]]: - """Extract coordinate lists from a river element. - - Args: - element: OSM element (way or relation). - - Returns: - List of coordinate lists (line segments). - """ - coord_lists: list[list[tuple[float, float]]] = [] - - if element.get("type") == "way" and "geometry" in element: - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) >= MIN_LINE_COORDS: - coord_lists.append(coords) - elif element.get("type") == "relation": - for member in element.get("members", []): - if member.get("type") == "way" and "geometry" in member: - coords = [(p["lon"], p["lat"]) for p in member["geometry"]] - if len(coords) >= MIN_LINE_COORDS: - coord_lists.append(coords) - - return coord_lists - - -def get_polish_lakes() -> gpd.GeoDataFrame: - """Get Polish lakes, sorted by area descending. - - Returns: - GeoDataFrame with lake polygons. - """ - cache_path = CACHE_DIR / "polish_lakes.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching lakes data from OSM...\n") - query = """ - [out:json][timeout:300]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["natural"="water"]["water"="lake"]["name"](area.pl); - way["natural"="water"]["water"="lake"]["name"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - geometry = _extract_polygon_from_element(element) - if geometry is None: - continue - - seen_names.add(name) - features.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geometry} - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} lakes.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - gdf = _add_area_column(gdf) - - if len(gdf) > 0: - # Filter to lakes > MIN_LAKE_AREA_KM2 to exclude tiny ponds - gdf = gdf[gdf["area_km2"] > MIN_LAKE_AREA_KM2] - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - - return gdf - - -def get_polish_rivers() -> gpd.GeoDataFrame: - """Get Polish rivers, sorted by length descending. - - Rivers with the same name but in different locations are kept separate - by using unique IDs from OSM when available. - - Returns: - GeoDataFrame with river linestrings. - """ - cache_path = CACHE_DIR / "polish_rivers.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "length_km" in gdf.columns: - return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching rivers data from OSM...\n") - query = """ - [out:json][timeout:300]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["waterway"="river"]["name"](area.pl); - way["waterway"="river"]["name"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - # Group ways by river name AND wikidata ID (or OSM ID for uniqueness) - # This prevents merging different rivers with the same name - rivers_by_key: dict[str, list[list[tuple[float, float]]]] = {} - river_names: dict[str, str] = {} # key -> display name - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name: - continue - - # Use wikidata ID if available, otherwise use element type+id - wikidata = element.get("tags", {}).get("wikidata", "") - if wikidata: - key = f"{name}_{wikidata}" - else: - # Fall back to element ID for grouping related ways - key = f"{name}_{element.get('type')}_{element.get('id')}" - - coord_lists = _extract_river_coords_from_element(element) - if coord_lists: - rivers_by_key.setdefault(key, []).extend(coord_lists) - river_names[key] = name - - features = [] - for key, coord_lists in rivers_by_key.items(): - name = river_names[key] - geometry: dict[str, Any] - if len(coord_lists) == 1: - geometry = {"type": "LineString", "coordinates": coord_lists[0]} - else: - geometry = {"type": "MultiLineString", "coordinates": coord_lists} - - features.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geometry} - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} rivers.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - gdf = _add_length_column(gdf) - - if len(gdf) > 0: - gdf = gdf[gdf["length_km"] > MIN_RIVER_LENGTH_KM] - return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) - - return gdf - - -def get_polish_islands() -> gpd.GeoDataFrame: - """Get Polish islands, sorted by area descending. - - Returns: - GeoDataFrame with island polygons. - """ - cache_path = CACHE_DIR / "polish_islands.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "area_km2" in gdf.columns: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching islands data from OSM...\n") - query = """ - [out:json][timeout:180]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["place"="island"]["name"](area.pl); - way["place"="island"]["name"](area.pl); - relation["place"="islet"]["name"](area.pl); - way["place"="islet"]["name"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - geometry = _extract_polygon_from_element(element) - if geometry is None: - continue - - seen_names.add(name) - features.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geometry} - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} islands.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - gdf = _add_area_column(gdf) - - if len(gdf) > 0: - return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) - return gdf - - -def get_polish_coastal_features() -> gpd.GeoDataFrame: - """Get Polish coastal features (peninsulas, spits, cliffs), sorted by length. - - Returns: - GeoDataFrame with coastal feature geometries. - """ - cache_path = CACHE_DIR / "polish_coastal_features.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - if "length_km" in gdf.columns: - return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) - return gdf - - sys.stdout.write("Fetching coastal features data from OSM...\n") - query = """ - [out:json][timeout:180]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["natural"="peninsula"]["name"](area.pl); - way["natural"="peninsula"]["name"](area.pl); - relation["natural"="spit"]["name"](area.pl); - way["natural"="spit"]["name"](area.pl); - relation["natural"="cliff"]["name"](area.pl); - way["natural"="cliff"]["name"](area.pl); - relation["natural"="coastline"]["name"](area.pl); - way["natural"="beach"]["name"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - line_types = ("cliff", "beach", "coastline") - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - natural_type = element.get("tags", {}).get("natural", "") - if not name or name in seen_names: - continue - - geometry = _extract_coastal_geometry(element, natural_type, line_types) - if geometry is None: - continue - - seen_names.add(name) - features.append( - { - "type": "Feature", - "properties": {"name": name, "type": natural_type}, - "geometry": geometry, - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} coastal features.\n") - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - gdf = _add_length_column(gdf) - - if len(gdf) > 0: - return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) - return gdf - - -def get_polish_unesco_sites() -> gpd.GeoDataFrame: - """Get Polish UNESCO World Heritage Sites, sorted by inscription year. - - Returns: - GeoDataFrame with UNESCO site geometries. - """ - cache_path = CACHE_DIR / "polish_unesco_sites.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - sys.stdout.write("Fetching UNESCO sites data from OSM...\n") - query = """ - [out:json][timeout:180]; - area["ISO3166-1"="PL"]->.pl; - ( - relation["heritage"="world_heritage_site"]["name"](area.pl); - way["heritage"="world_heritage_site"]["name"](area.pl); - node["heritage"="world_heritage_site"]["name"](area.pl); - relation["heritage:operator"="whc"]["name"](area.pl); - way["heritage:operator"="whc"]["name"](area.pl); - node["heritage:operator"="whc"]["name"](area.pl); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - min_ring_coords = 4 - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - if element.get("type") == "node": - geometry: dict[str, Any] = { - "type": "Point", - "coordinates": [element["lon"], element["lat"]], - } - elif element.get("type") == "relation": - outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) - if not outer_rings: - continue - geometry = _build_osiedla_geometry(outer_rings, inner_rings) - elif element.get("type") == "way" and "geometry" in element: - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) < min_ring_coords: - continue - if coords[0] != coords[-1]: - coords.append(coords[0]) - geometry = {"type": "Polygon", "coordinates": [coords]} - else: - continue - - seen_names.add(name) - features.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geometry} - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) - - sys.stdout.write(f"Cached {len(features)} UNESCO sites.\n") - return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") diff --git a/python_pkg/geo_data/_warsaw.py b/python_pkg/geo_data/_warsaw.py deleted file mode 100644 index 76c45a3..0000000 --- a/python_pkg/geo_data/_warsaw.py +++ /dev/null @@ -1,407 +0,0 @@ -"""Warsaw geographic data functions. - -Functions for downloading and caching Warsaw-specific geographic data: -boundaries, districts, Vistula river, bridges, metro stations, and osiedla. -""" - -from __future__ import annotations - -import json -import sys - -import geopandas as gpd -from shapely.geometry import LineString - -from python_pkg.geo_data._common import ( - _PKG_DIR, - CACHE_DIR, - _build_osiedla_geometry, - _ensure_cache_dir, - _extract_osiedla_rings, - _overpass_query, -) - - -def get_warsaw_boundary() -> gpd.GeoDataFrame: - """Get Warsaw city boundary. - - Returns: - GeoDataFrame with Warsaw boundary polygon. - """ - cache_path = CACHE_DIR / "warsaw_boundary.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - # Try to use districts file first - districts_path = ( - _PKG_DIR / "anki_decks" / "warsaw_districts" / "warszawa-dzielnice.geojson" - ) - if districts_path.exists(): - warsaw_gdf = gpd.read_file(districts_path) - warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"] - if len(warsaw_boundary) == 0: - warsaw_boundary = gpd.GeoDataFrame( - geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs - ) - _ensure_cache_dir() - warsaw_boundary.to_file(cache_path, driver="GeoJSON") - return warsaw_boundary - - # Fallback to Overpass query - sys.stdout.write("Fetching Warsaw boundary from OpenStreetMap...\n") - query = """ - [out:json][timeout:60]; - relation["name"="Warszawa"]["admin_level"="6"]; - out geom; - """ - - data = _overpass_query(query) - - features = [] - for element in data.get("elements", []): - if element.get("type") == "relation": - coords = [] - for member in element.get("members", []): - if member.get("role") == "outer" and "geometry" in member: - coords.extend([(p["lon"], p["lat"]) for p in member["geometry"]]) - if coords: - features.append( - { - "type": "Feature", - "properties": {"name": "Warszawa"}, - "geometry": {"type": "Polygon", "coordinates": [coords]}, - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson)) - - return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - - -def get_warsaw_districts() -> gpd.GeoDataFrame: - """Get Warsaw districts (dzielnice). - - Returns: - GeoDataFrame with district boundaries. - """ - districts_path = ( - _PKG_DIR / "anki_decks" / "warsaw_districts" / "warszawa-dzielnice.geojson" - ) - if districts_path.exists(): - gdf = gpd.read_file(districts_path) - return gdf[gdf["name"] != "Warszawa"].copy() - - msg = "Warsaw districts GeoJSON not found" - raise FileNotFoundError(msg) - - -def get_vistula_river() -> gpd.GeoDataFrame: - """Get Vistula river in Warsaw. - - Returns: - GeoDataFrame with river geometry. - """ - cache_path = CACHE_DIR / "warsaw_vistula.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - sys.stdout.write("Fetching Vistula river data...\n") - query = """ - [out:json][timeout:60]; - area["name"="Warszawa"]["admin_level"="6"]->.warsaw; - ( - way["waterway"="river"]["name"="Wisła"](area.warsaw); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - min_coords = 2 - for element in data.get("elements", []): - if element.get("type") == "way" and "geometry" in element: - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) >= min_coords: - features.append( - { - "type": "Feature", - "properties": {"name": "Wisła"}, - "geometry": {"type": "LineString", "coordinates": coords}, - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson)) - - return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - - -def get_warsaw_bridges() -> gpd.GeoDataFrame: - """Get Warsaw bridges over the Vistula. - - Returns: - GeoDataFrame with bridge geometries. - """ - cache_path = CACHE_DIR / "warsaw_bridges.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - sys.stdout.write("Fetching Warsaw bridges data...\n") - - # First get the Vistula to filter bridges - vistula = get_vistula_river() - vistula_union = vistula.union_all() - vistula_buffer = vistula_union.buffer(0.002) # ~200m buffer - - # Query for bridges with "Most" in name - smaller query - query = """ - [out:json][timeout:90]; - area["name"="Warszawa"]["admin_level"="6"]->.warsaw; - way["bridge"="yes"]["name"~"^Most"](area.warsaw); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - min_coords = 2 - - for element in data.get("elements", []): - if element.get("type") != "way" or "geometry" not in element: - continue - - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) < min_coords: - continue - - line = LineString(coords) - - # Check if bridge crosses/is near Vistula - if line.intersects(vistula_buffer): - seen_names.add(name) - features.append( - { - "type": "Feature", - "properties": {"name": name, "osm_id": element.get("id")}, - "geometry": {"type": "LineString", "coordinates": coords}, - } - ) - - # Merge segments of the same bridge - merged_features = _merge_bridge_segments(features) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": merged_features} - cache_path.write_text(json.dumps(geojson)) - - sys.stdout.write(f"Cached {len(merged_features)} bridges.\n") - return gpd.GeoDataFrame.from_features(merged_features, crs="EPSG:4326") - - -def _merge_bridge_segments(features: list[dict]) -> list[dict]: - """Merge bridge segments with the same name. - - Args: - features: List of GeoJSON features. - - Returns: - List of merged features. - """ - by_name: dict[str, list[list[tuple[float, float]]]] = {} - - for feature in features: - name = feature["properties"]["name"] - coords = feature["geometry"]["coordinates"] - if name not in by_name: - by_name[name] = [] - by_name[name].append(coords) - - merged = [] - for name, coord_lists in by_name.items(): - if len(coord_lists) == 1: - geom = {"type": "LineString", "coordinates": coord_lists[0]} - else: - geom = {"type": "MultiLineString", "coordinates": coord_lists} - - merged.append( - {"type": "Feature", "properties": {"name": name}, "geometry": geom} - ) - - return merged - - -def get_warsaw_metro_stations() -> gpd.GeoDataFrame: - """Get Warsaw metro stations with line information. - - Returns: - GeoDataFrame with station points and line info (M1, M2, or M1/M2). - """ - cache_path = CACHE_DIR / "warsaw_metro.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - # Known stations for each line (as of 2024) - m1_stations = { - "Kabaty", - "Natolin", - "Imielin", - "Stokłosy", - "Ursynów", - "Służew", - "Wilanowska", - "Wierzbno", - "Racławicka", - "Pole Mokotowskie", - "Politechnika", - "Centrum", - "Świętokrzyska", # Also M2 - "Ratusz-Arsenał", - "Dworzec Gdański", - "Plac Wilsona", - "Marymont", - "Słodowiec", - "Stare Bielany", - "Wawrzyszew", - "Młociny", - } - m2_stations = { - "Bródno", - "Kondratowicza", - "Zacisze", - "Targówek Mieszkaniowy", - "Trocka", - "Szwedzka", - "Dworzec Wileński", - "Świętokrzyska", # Also M1 - "Nowy Świat-Uniwersytet", - "Centrum Nauki Kopernik", - "Stadion Narodowy", - "Rondo ONZ", - "Rondo Daszyńskiego", - "Płocka", - "Młynów", - "Księcia Janusza", - "Ulrychów", - "Bemowo", - } - - sys.stdout.write("Fetching metro station data...\n") - query = """ - [out:json][timeout:60]; - area["name"="Warszawa"]["admin_level"="6"]->.warsaw; - ( - node["railway"="station"]["station"="subway"](area.warsaw); - node["railway"="station"]["network"~"Metro"](area.warsaw); - ); - out body; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - if element.get("type") == "node": - name = element.get("tags", {}).get("name", "") - if name and name not in seen_names: - seen_names.add(name) - # Determine line from known station lists - in_m1 = name in m1_stations - in_m2 = name in m2_stations - if in_m1 and in_m2: - line = "M1/M2" - elif in_m1: - line = "M1" - elif in_m2: - line = "M2" - else: - line = "?" # Unknown station - - features.append( - { - "type": "Feature", - "properties": { - "name": name, - "line": line, - }, - "geometry": { - "type": "Point", - "coordinates": [element["lon"], element["lat"]], - }, - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson)) - - sys.stdout.write(f"Cached {len(features)} metro stations.\n") - return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - - -def get_warsaw_osiedla() -> gpd.GeoDataFrame: - """Get Warsaw osiedla (neighborhoods). - - Returns: - GeoDataFrame with osiedla boundaries. - """ - cache_path = CACHE_DIR / "warsaw_osiedla.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - sys.stdout.write("Fetching osiedla data...\n") - query = """ - [out:json][timeout:180]; - area["name"="Warszawa"]["admin_level"="6"]->.warsaw; - relation["boundary"="administrative"]["admin_level"="11"]["name"](area.warsaw); - out geom; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - min_ring_coords = 4 - - for element in data.get("elements", []): - if element.get("type") != "relation": - continue - - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) - if not outer_rings: - continue - - seen_names.add(name) - features.append( - { - "type": "Feature", - "properties": {"name": name}, - "geometry": _build_osiedla_geometry(outer_rings, inner_rings), - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson)) - - sys.stdout.write(f"Cached {len(features)} osiedla.\n") - return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") diff --git a/python_pkg/geo_data/_warsaw_places.py b/python_pkg/geo_data/_warsaw_places.py deleted file mode 100644 index 0bc58f2..0000000 --- a/python_pkg/geo_data/_warsaw_places.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Warsaw streets, landmarks, and place data. - -Functions for downloading and caching Warsaw streets, landmarks, -and other place-related geographic data. -""" - -from __future__ import annotations - -import json -import sys - -import geopandas as gpd -from shapely.geometry import MultiLineString - -from python_pkg.geo_data._common import CACHE_DIR, _ensure_cache_dir, _overpass_query - - -def get_warsaw_streets(min_length: int = 500) -> gpd.GeoDataFrame: - """Get major Warsaw streets. - - Args: - min_length: Minimum street length in meters. - - Returns: - GeoDataFrame with street geometries. - """ - cache_path = CACHE_DIR / "warsaw_streets.geojson" - - if cache_path.exists(): - gdf = gpd.read_file(cache_path) - # Filter by length if needed - return _filter_streets_by_length(gdf, min_length) - - sys.stdout.write("Fetching street data from OpenStreetMap...\n") - query = """ - [out:json][timeout:120]; - area["name"="Warszawa"]["admin_level"="6"]->.warsaw; - ( - way["highway"="primary"]["name"](area.warsaw); - way["highway"="secondary"]["name"](area.warsaw); - way["highway"="tertiary"]["name"](area.warsaw); - ); - out geom; - """ - - data = _overpass_query(query) - - features = [] - min_coords = 2 - - for element in data.get("elements", []): - if element.get("type") == "way" and "geometry" in element: - coords = [(p["lon"], p["lat"]) for p in element["geometry"]] - if len(coords) >= min_coords: - features.append( - { - "type": "Feature", - "properties": { - "name": element.get("tags", {}).get("name", "Unknown"), - "highway": element.get("tags", {}).get("highway", ""), - }, - "geometry": {"type": "LineString", "coordinates": coords}, - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson)) - - sys.stdout.write(f"Cached {len(features)} street segments.\n") - - gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") - return _filter_streets_by_length(gdf, min_length) - - -def _filter_streets_by_length( - gdf: gpd.GeoDataFrame, min_length: int -) -> gpd.GeoDataFrame: - """Filter and merge streets by name, keeping only those above min_length. - - Args: - gdf: GeoDataFrame with street segments. - min_length: Minimum length in meters. - - Returns: - GeoDataFrame with merged streets, sorted by length (longest first). - """ - # Group by street name - streets: dict[str, list] = {} - for _, row in gdf.iterrows(): - name = row.get("name", "Unknown") - if name and name != "Unknown": - if name not in streets: - streets[name] = [] - streets[name].append(row.geometry) - - # Merge and filter - result_rows = [] - for name, geometries in streets.items(): - merged = geometries[0] if len(geometries) == 1 else MultiLineString(geometries) - - # Create temp GeoDataFrame for length calculation - temp_gdf = gpd.GeoDataFrame(geometry=[merged], crs="EPSG:4326") - temp_proj = temp_gdf.to_crs("EPSG:2180") # Polish coordinate system - length = temp_proj.geometry.length.iloc[0] - - if length >= min_length: - result_rows.append({"name": name, "geometry": merged, "length_m": length}) - - # Sort by length (longest first) - result_rows.sort(key=lambda x: x["length_m"], reverse=True) - - return gpd.GeoDataFrame( - result_rows, - crs="EPSG:4326" if result_rows else None, - ) - - -def get_warsaw_landmarks() -> gpd.GeoDataFrame: - """Get Warsaw landmarks (museums, monuments, parks, etc.). - - Returns: - GeoDataFrame with landmark points. - """ - cache_path = CACHE_DIR / "warsaw_landmarks.geojson" - - if cache_path.exists(): - return gpd.read_file(cache_path) - - sys.stdout.write("Fetching landmark data...\n") - # Simplified query - just museums and major attractions - query = """ - [out:json][timeout:60]; - area["name"="Warszawa"]["admin_level"="6"]->.warsaw; - ( - node["tourism"="museum"]["name"](area.warsaw); - node["tourism"="attraction"]["name"](area.warsaw); - node["historic"="monument"]["name"](area.warsaw); - way["tourism"="museum"]["name"](area.warsaw); - way["tourism"="attraction"]["name"](area.warsaw); - ); - out center; - """ - - data = _overpass_query(query) - - features = [] - seen_names: set[str] = set() - - for element in data.get("elements", []): - name = element.get("tags", {}).get("name", "") - if not name or name in seen_names: - continue - - # Get coordinates - if element.get("type") == "node": - lon, lat = element["lon"], element["lat"] - elif "center" in element: - lon, lat = element["center"]["lon"], element["center"]["lat"] - else: - continue - - seen_names.add(name) - landmark_type = ( - element.get("tags", {}).get("tourism") - or element.get("tags", {}).get("historic") - or element.get("tags", {}).get("leisure") - or "landmark" - ) - - features.append( - { - "type": "Feature", - "properties": {"name": name, "type": landmark_type}, - "geometry": {"type": "Point", "coordinates": [lon, lat]}, - } - ) - - _ensure_cache_dir() - geojson = {"type": "FeatureCollection", "features": features} - cache_path.write_text(json.dumps(geojson)) - - sys.stdout.write(f"Cached {len(features)} landmarks.\n") - - if not features: - return gpd.GeoDataFrame( - {"name": [], "type": [], "geometry": []}, crs="EPSG:4326" - ) - return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") diff --git a/python_pkg/geo_data/tests/__init__.py b/python_pkg/geo_data/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/python_pkg/geo_data/tests/test_common.py b/python_pkg/geo_data/tests/test_common.py deleted file mode 100644 index 8f3f36c..0000000 --- a/python_pkg/geo_data/tests/test_common.py +++ /dev/null @@ -1,487 +0,0 @@ -"""Tests for python_pkg.geo_data._common module.""" - -from __future__ import annotations - -from pathlib import Path -from typing import Any -from unittest.mock import MagicMock, patch - -import pytest -from shapely.geometry import ( - GeometryCollection, - LineString, - MultiPolygon, - Point, - Polygon, -) - -from python_pkg.geo_data._common import ( - _build_osiedla_geometry, - _download_github_geojson, - _ensure_cache_dir, - _extract_line_from_way, - _extract_osiedla_rings, - _extract_polygon_from_element, - _extract_polygonal_geometry, - _overpass_query, - _try_single_request, -) - - -class TestEnsureCacheDir: - """Tests for _ensure_cache_dir.""" - - def test_creates_directory(self) -> None: - with patch.object(Path, "mkdir") as mock_mkdir: - _ensure_cache_dir() - mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) - - -class TestExtractPolygonalGeometry: - """Tests for _extract_polygonal_geometry.""" - - def test_polygon_returned_directly(self) -> None: - poly = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) - result = _extract_polygonal_geometry(poly) - assert result is poly - - def test_multipolygon_returned_directly(self) -> None: - mp = MultiPolygon( - [ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]), - ] - ) - result = _extract_polygonal_geometry(mp) - assert result is mp - - def test_geometry_collection_single_polygon(self) -> None: - poly = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) - gc = GeometryCollection([poly, LineString([(0, 0), (1, 1)])]) - result = _extract_polygonal_geometry(gc) - assert result is not None - assert result.equals(poly) - - def test_geometry_collection_multiple_polygons(self) -> None: - p1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) - p2 = Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]) - gc = GeometryCollection([p1, p2, LineString([(0, 0), (1, 1)])]) - result = _extract_polygonal_geometry(gc) - assert isinstance(result, MultiPolygon) - - def test_geometry_collection_with_multipolygon(self) -> None: - p1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) - mp = MultiPolygon( - [ - Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]), - Polygon([(4, 4), (5, 4), (5, 5), (4, 5)]), - ] - ) - gc = GeometryCollection([p1, mp]) - result = _extract_polygonal_geometry(gc) - assert isinstance(result, MultiPolygon) - - def test_geometry_collection_no_polygons(self) -> None: - gc = GeometryCollection([LineString([(0, 0), (1, 1)])]) - result = _extract_polygonal_geometry(gc) - assert result is None - - def test_unsupported_geometry_type(self) -> None: - point = Point(0, 0) - result = _extract_polygonal_geometry(point) - assert result is None - - -class TestTrySingleRequest: - """Tests for _try_single_request.""" - - @patch("python_pkg.geo_data._common.requests.post") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_successful_request( - self, mock_stdout: MagicMock, mock_post: MagicMock - ) -> None: - mock_response = MagicMock() - mock_response.json.return_value = {"elements": []} - mock_post.return_value = mock_response - - result, error = _try_single_request("http://example.com", "query") - assert result == {"elements": []} - assert error is None - - @patch("python_pkg.geo_data._common.requests.post") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_request_exception( - self, mock_stdout: MagicMock, mock_post: MagicMock - ) -> None: - import requests - - mock_post.side_effect = requests.RequestException("fail") - result, error = _try_single_request("http://example.com", "query") - assert result is None - assert isinstance(error, requests.RequestException) - - @patch("python_pkg.geo_data._common.requests.post") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_invalid_response_format( - self, mock_stdout: MagicMock, mock_post: MagicMock - ) -> None: - mock_response = MagicMock() - mock_response.json.return_value = {"no_elements": True} - mock_post.return_value = mock_response - - result, error = _try_single_request("http://example.com", "query") - assert result is None - assert isinstance(error, ValueError) - - @patch("python_pkg.geo_data._common.requests.post") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_non_dict_response( - self, mock_stdout: MagicMock, mock_post: MagicMock - ) -> None: - mock_response = MagicMock() - mock_response.json.return_value = [1, 2, 3] - mock_post.return_value = mock_response - - result, error = _try_single_request("http://example.com", "query") - assert result is None - assert isinstance(error, ValueError) - - @patch("python_pkg.geo_data._common.requests.post") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_value_error_on_json_parse( - self, mock_stdout: MagicMock, mock_post: MagicMock - ) -> None: - mock_response = MagicMock() - mock_response.json.side_effect = ValueError("bad json") - mock_post.return_value = mock_response - - result, error = _try_single_request("http://example.com", "query") - assert result is None - assert isinstance(error, ValueError) - - @patch("python_pkg.geo_data._common.requests.post") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_timeout_error(self, mock_stdout: MagicMock, mock_post: MagicMock) -> None: - import requests - - mock_post.side_effect = requests.Timeout("timeout") - result, error = _try_single_request("http://example.com", "query") - assert result is None - assert isinstance(error, requests.Timeout) - - -class TestOverpassQuery: - """Tests for _overpass_query.""" - - @patch("python_pkg.geo_data._common._try_single_request") - def test_success_on_first_try(self, mock_req: MagicMock) -> None: - mock_req.return_value = ({"elements": []}, None) - result = _overpass_query("query") - assert result == {"elements": []} - - @patch("python_pkg.geo_data._common.time.sleep") - @patch("python_pkg.geo_data._common._try_single_request") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_retries_then_succeeds( - self, mock_stdout: MagicMock, mock_req: MagicMock, mock_sleep: MagicMock - ) -> None: - mock_req.side_effect = [ - (None, ValueError("fail1")), - ({"elements": []}, None), - ] - result = _overpass_query("query") - assert result == {"elements": []} - - @patch("python_pkg.geo_data._common.time.sleep") - @patch("python_pkg.geo_data._common._try_single_request") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_all_endpoints_fail( - self, mock_stdout: MagicMock, mock_req: MagicMock, mock_sleep: MagicMock - ) -> None: - mock_req.return_value = (None, ValueError("fail")) - with pytest.raises(RuntimeError, match="All Overpass API endpoints failed"): - _overpass_query("query") - - -class TestDownloadGithubGeojson: - """Tests for _download_github_geojson.""" - - @patch("python_pkg.geo_data._common.gpd.read_file") - def test_cached_file_exists(self, mock_read: MagicMock) -> None: - mock_gdf = MagicMock() - mock_read.return_value = mock_gdf - cache_path = MagicMock() - cache_path.exists.return_value = True - - result = _download_github_geojson("http://example.com/data.geojson", cache_path) - assert result is mock_gdf - mock_read.assert_called_once_with(cache_path) - - @patch("python_pkg.geo_data._common.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._common._ensure_cache_dir") - @patch("python_pkg.geo_data._common.requests.get") - @patch("python_pkg.geo_data._common.sys.stdout") - def test_downloads_and_caches( - self, - mock_stdout: MagicMock, - mock_get: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - features_data: dict[str, Any] = { - "features": [ - { - "type": "Feature", - "properties": {"name": "test"}, - "geometry": {"type": "Point", "coordinates": [0, 0]}, - } - ] - } - mock_response = MagicMock() - mock_response.json.return_value = features_data - mock_get.return_value = mock_response - - mock_gdf = MagicMock() - mock_from_features.return_value = mock_gdf - - cache_path = MagicMock() - cache_path.exists.return_value = False - - result = _download_github_geojson( - "https://example.com/data.geojson", cache_path - ) - assert result is mock_gdf - - def test_unsupported_url_scheme(self) -> None: - cache_path = MagicMock() - cache_path.exists.return_value = False - with pytest.raises(ValueError, match="Unsupported URL scheme"): - _download_github_geojson("ftp://example.com/data", cache_path) - - -class TestExtractOsiedlaRings: - """Tests for _extract_osiedla_rings.""" - - def test_outer_and_inner_rings(self) -> None: - element: dict[str, Any] = { - "members": [ - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - { - "role": "inner", - "geometry": [ - {"lon": 0.2, "lat": 0.2}, - {"lon": 0.4, "lat": 0.2}, - {"lon": 0.4, "lat": 0.4}, - {"lon": 0.2, "lat": 0.4}, - ], - }, - ] - } - outer, inner = _extract_osiedla_rings(element, 4) - assert len(outer) == 1 - assert len(inner) == 1 - - def test_ring_too_short(self) -> None: - element: dict[str, Any] = { - "members": [ - { - "role": "outer", - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}], - } - ] - } - outer, inner = _extract_osiedla_rings(element, 4) - assert len(outer) == 0 - assert len(inner) == 0 - - def test_no_geometry_in_member(self) -> None: - element: dict[str, Any] = {"members": [{"role": "outer"}]} - outer, inner = _extract_osiedla_rings(element, 4) - assert len(outer) == 0 - assert len(inner) == 0 - - def test_already_closed_ring(self) -> None: - element: dict[str, Any] = { - "members": [ - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 0}, - ], - } - ] - } - outer, _ = _extract_osiedla_rings(element, 4) - assert len(outer) == 1 - # Already closed, so no extra point - assert outer[0][0] == outer[0][-1] - - def test_no_members(self) -> None: - element: dict[str, Any] = {} - outer, inner = _extract_osiedla_rings(element, 4) - assert len(outer) == 0 - assert len(inner) == 0 - - def test_unknown_role_ignored(self) -> None: - element: dict[str, Any] = { - "members": [ - { - "role": "label", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ] - } - outer, inner = _extract_osiedla_rings(element, 4) - assert len(outer) == 0 - assert len(inner) == 0 - - -class TestBuildOsiedlaGeometry: - """Tests for _build_osiedla_geometry.""" - - def test_single_outer_ring(self) -> None: - outer = [[(0, 0), (1, 0), (1, 1), (0, 0)]] - inner: list[list[tuple[float, float]]] = [] - result = _build_osiedla_geometry(outer, inner) - assert result["type"] == "Polygon" - - def test_single_outer_with_inner(self) -> None: - outer = [[(0, 0), (1, 0), (1, 1), (0, 0)]] - inner = [[(0.2, 0.2), (0.4, 0.2), (0.4, 0.4), (0.2, 0.2)]] - result = _build_osiedla_geometry(outer, inner) - assert result["type"] == "Polygon" - assert len(result["coordinates"]) == 2 - - def test_multiple_outer_rings(self) -> None: - outer = [ - [(0, 0), (1, 0), (1, 1), (0, 0)], - [(2, 2), (3, 2), (3, 3), (2, 2)], - ] - inner: list[list[tuple[float, float]]] = [] - result = _build_osiedla_geometry(outer, inner) - assert result["type"] == "MultiPolygon" - - -class TestExtractPolygonFromElement: - """Tests for _extract_polygon_from_element.""" - - def test_relation_with_rings(self) -> None: - element: dict[str, Any] = { - "type": "relation", - "members": [ - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ], - } - result = _extract_polygon_from_element(element) - assert result is not None - assert result["type"] == "Polygon" - - def test_relation_without_outer_rings(self) -> None: - element: dict[str, Any] = { - "type": "relation", - "members": [{"role": "inner", "geometry": [{"lon": 0, "lat": 0}]}], - } - result = _extract_polygon_from_element(element) - assert result is None - - def test_way_with_enough_coords(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - result = _extract_polygon_from_element(element) - assert result is not None - assert result["type"] == "Polygon" - # Should close the ring - assert result["coordinates"][0][0] == result["coordinates"][0][-1] - - def test_way_already_closed(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 0}, - ], - } - result = _extract_polygon_from_element(element) - assert result is not None - - def test_way_too_few_coords(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}], - } - result = _extract_polygon_from_element(element) - assert result is None - - def test_way_no_geometry(self) -> None: - element: dict[str, Any] = {"type": "way"} - result = _extract_polygon_from_element(element) - assert result is None - - def test_unknown_type(self) -> None: - element: dict[str, Any] = {"type": "node"} - result = _extract_polygon_from_element(element) - assert result is None - - -class TestExtractLineFromWay: - """Tests for _extract_line_from_way.""" - - def test_valid_way(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - } - result = _extract_line_from_way(element) - assert result is not None - assert result["type"] == "LineString" - - def test_too_few_coords(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}], - } - result = _extract_line_from_way(element) - assert result is None - - def test_not_a_way(self) -> None: - element: dict[str, Any] = {"type": "node"} - result = _extract_line_from_way(element) - assert result is None - - def test_way_no_geometry(self) -> None: - element: dict[str, Any] = {"type": "way"} - result = _extract_line_from_way(element) - assert result is None diff --git a/python_pkg/geo_data/tests/test_common_part2.py b/python_pkg/geo_data/tests/test_common_part2.py deleted file mode 100644 index 4694e64..0000000 --- a/python_pkg/geo_data/tests/test_common_part2.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Tests for _add_area_column and _add_length_column (non-empty GDFs).""" - -from __future__ import annotations - -import geopandas as gpd -from shapely.geometry import LineString, Polygon - -from python_pkg.geo_data._common import _add_area_column, _add_length_column - - -class TestAddAreaColumnNonEmpty: - """Tests for _add_area_column with non-empty GeoDataFrame.""" - - def test_adds_area_column(self) -> None: - gdf = gpd.GeoDataFrame( - {"name": ["A"]}, - geometry=[Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])], - crs="EPSG:4326", - ) - result = _add_area_column(gdf) - assert "area_km2" in result.columns - assert result["area_km2"].iloc[0] > 0 - - -class TestAddLengthColumnNonEmpty: - """Tests for _add_length_column with non-empty GeoDataFrame.""" - - def test_adds_length_column(self) -> None: - gdf = gpd.GeoDataFrame( - {"name": ["A"]}, - geometry=[LineString([(20, 50), (21, 51)])], - crs="EPSG:4326", - ) - result = _add_length_column(gdf) - assert "length_km" in result.columns - assert result["length_km"].iloc[0] > 0 - - -class TestAddAreaColumnEmpty: - """Tests for _add_area_column with empty GeoDataFrame.""" - - def test_returns_empty_gdf(self) -> None: - gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - result = _add_area_column(gdf) - assert len(result) == 0 - - -class TestAddLengthColumnEmpty: - """Tests for _add_length_column with empty GeoDataFrame.""" - - def test_returns_empty_gdf(self) -> None: - gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - result = _add_length_column(gdf) - assert len(result) == 0 diff --git a/python_pkg/geo_data/tests/test_init.py b/python_pkg/geo_data/tests/test_init.py deleted file mode 100644 index 16132f7..0000000 --- a/python_pkg/geo_data/tests/test_init.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Tests for python_pkg.geo_data.__init__ module.""" - -from __future__ import annotations - -from unittest.mock import MagicMock, patch - -from python_pkg.geo_data import ( - clear_cache, - download_all_poland_data, - download_all_warsaw_data, -) - - -class TestDownloadAllWarsawData: - """Tests for download_all_warsaw_data.""" - - def test_calls_all_warsaw_functions(self) -> None: - with ( - patch("python_pkg.geo_data.sys.stdout"), - patch("python_pkg.geo_data.get_warsaw_boundary") as mock_boundary, - patch("python_pkg.geo_data.get_vistula_river") as mock_vistula, - patch("python_pkg.geo_data.get_warsaw_bridges") as mock_bridges, - patch("python_pkg.geo_data.get_warsaw_metro_stations") as mock_metro, - patch("python_pkg.geo_data.get_warsaw_streets") as mock_streets, - patch("python_pkg.geo_data.get_warsaw_landmarks") as mock_landmarks, - patch("python_pkg.geo_data.get_warsaw_osiedla") as mock_osiedla, - ): - download_all_warsaw_data() - mock_boundary.assert_called_once() - mock_vistula.assert_called_once() - mock_bridges.assert_called_once() - mock_metro.assert_called_once() - mock_streets.assert_called_once() - mock_landmarks.assert_called_once() - mock_osiedla.assert_called_once() - - -class TestDownloadAllPolandData: - """Tests for download_all_poland_data.""" - - @patch("python_pkg.geo_data.get_poland_boundary") - @patch("python_pkg.geo_data.get_polish_gminy") - @patch("python_pkg.geo_data.get_polish_powiaty") - @patch("python_pkg.geo_data.get_polish_wojewodztwa") - @patch("python_pkg.geo_data.sys.stdout") - def test_calls_all_poland_functions( - self, - mock_stdout: MagicMock, - mock_woj: MagicMock, - mock_powiaty: MagicMock, - mock_gminy: MagicMock, - mock_boundary: MagicMock, - ) -> None: - download_all_poland_data() - mock_woj.assert_called_once() - mock_powiaty.assert_called_once() - mock_gminy.assert_called_once() - mock_boundary.assert_called_once() - - -class TestClearCache: - """Tests for clear_cache.""" - - @patch("python_pkg.geo_data.shutil.rmtree") - @patch("python_pkg.geo_data.CACHE_DIR") - @patch("python_pkg.geo_data.sys.stdout") - def test_cache_exists( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_rmtree: MagicMock, - ) -> None: - mock_cache_dir.exists.return_value = True - clear_cache() - mock_rmtree.assert_called_once_with(mock_cache_dir) - - @patch("python_pkg.geo_data.CACHE_DIR") - @patch("python_pkg.geo_data.sys.stdout") - def test_cache_not_exists( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - ) -> None: - mock_cache_dir.exists.return_value = False - clear_cache() diff --git a/python_pkg/geo_data/tests/test_poland_admin.py b/python_pkg/geo_data/tests/test_poland_admin.py deleted file mode 100644 index 78309ab..0000000 --- a/python_pkg/geo_data/tests/test_poland_admin.py +++ /dev/null @@ -1,313 +0,0 @@ -"""Tests for python_pkg.geo_data._poland_admin module.""" - -from __future__ import annotations - -import json -from unittest.mock import MagicMock, patch - -import geopandas as gpd -from shapely.geometry import Polygon - -from python_pkg.geo_data._poland_admin import ( - _get_powiaty_population, - _query_wikidata, - get_poland_boundary, - get_polish_gminy, - get_polish_powiaty, - get_polish_wojewodztwa, -) - - -class TestQueryWikidata: - """Tests for _query_wikidata.""" - - @patch("python_pkg.geo_data._poland_admin.requests.get") - def test_successful_query(self, mock_get: MagicMock) -> None: - mock_response = MagicMock() - mock_response.json.return_value = { - "results": {"bindings": [{"name": {"value": "test"}}]} - } - mock_get.return_value = mock_response - - result = _query_wikidata("SELECT ?x WHERE {}") - assert result == [{"name": {"value": "test"}}] - mock_response.raise_for_status.assert_called_once() - - -class TestGetPowiatyPopulation: - """Tests for _get_powiaty_population.""" - - @patch("python_pkg.geo_data._poland_admin.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_path.read_text.return_value = json.dumps({"Kraków": 780000}) - - result = _get_powiaty_population() - assert result == {"Kraków": 780000} - - @patch("python_pkg.geo_data._poland_admin._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_admin._query_wikidata") - @patch("python_pkg.geo_data._poland_admin.CACHE_DIR") - @patch("python_pkg.geo_data._poland_admin.sys.stdout") - def test_downloads_and_caches( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = [ - { - "powiatLabel": {"value": "powiat krakowski"}, - "population": {"value": "100000"}, - }, - { - "powiatLabel": {"value": "powiat wadowicki"}, - "population": {"value": "bad_value"}, - }, - { - "powiatLabel": {"value": ""}, - "population": {"value": "50000"}, - }, - { - "population": {"value": "30000"}, - }, - ] - - result = _get_powiaty_population() - assert "krakowski" in result - mock_path.write_text.assert_called_once() - - @patch("python_pkg.geo_data._poland_admin._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_admin._query_wikidata") - @patch("python_pkg.geo_data._poland_admin.CACHE_DIR") - @patch("python_pkg.geo_data._poland_admin.sys.stdout") - def test_empty_label_skipped( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = [ - {"powiatLabel": {"value": ""}, "population": {"value": "1000"}}, - ] - - result = _get_powiaty_population() - assert len(result) == 0 - - -class TestGetPolishWojewodztwa: - """Tests for get_polish_wojewodztwa.""" - - @patch("python_pkg.geo_data._poland_admin._download_github_geojson") - def test_returns_geodataframe(self, mock_download: MagicMock) -> None: - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_download.return_value = mock_gdf - - result = get_polish_wojewodztwa() - assert result is mock_gdf - - -class TestGetPolishPowiaty: - """Tests for get_polish_powiaty.""" - - @patch("python_pkg.geo_data._poland_admin._get_powiaty_population") - @patch("python_pkg.geo_data._poland_admin._download_github_geojson") - def test_with_population( - self, mock_download: MagicMock, mock_pop: MagicMock - ) -> None: - gdf = gpd.GeoDataFrame( - {"nazwa": ["powiat krakowski", "powiat Wadowice", "powiat xyz", ""]}, - geometry=[ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - ], - crs="EPSG:4326", - ) - mock_download.return_value = gdf - mock_pop.return_value = {"krakowski": 100000, "wadowice": 50000} - - result = get_polish_powiaty() - assert "population" in result.columns - # krakowski matched directly - assert result.iloc[0]["population"] == 100000 - # Wadowice matched case-insensitively - assert result.iloc[1]["population"] == 50000 - - -class TestGetPolishGminy: - """Tests for get_polish_gminy.""" - - @patch("python_pkg.geo_data._poland_admin.gpd.read_file") - @patch("python_pkg.geo_data._poland_admin.CACHE_DIR") - def test_cached_with_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - { - "name": ["A", "B"], - "area_km2": [200.0, 100.0], - }, - geometry=[ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]), - ], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_gminy() - assert result.iloc[0]["area_km2"] == 200.0 - - @patch("python_pkg.geo_data._poland_admin.gpd.read_file") - @patch("python_pkg.geo_data._poland_admin.CACHE_DIR") - def test_cached_without_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["A"]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_gminy() - assert len(result) == 1 - - def test_downloads_from_osm(self) -> None: - with ( - patch("python_pkg.geo_data._poland_admin.sys.stdout"), - patch("python_pkg.geo_data._poland_admin.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_admin._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_admin._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_admin.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch("python_pkg.geo_data._common._add_area_column") as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "relation", - "tags": {"name": "Gmina A"}, - "members": [ - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ], - }, - # Duplicate name - should be skipped - { - "type": "relation", - "tags": {"name": "Gmina A"}, - "members": [ - { - "role": "outer", - "geometry": [ - {"lon": 2, "lat": 2}, - {"lon": 3, "lat": 2}, - {"lon": 3, "lat": 3}, - {"lon": 2, "lat": 3}, - ], - } - ], - }, - # Not a relation - should be skipped - {"type": "way", "tags": {"name": "Way"}}, - # No name - {"type": "relation", "tags": {}}, - # No outer rings - { - "type": "relation", - "tags": {"name": "Empty"}, - "members": [], - }, - ] - } - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Gmina A"], "area_km2": [100.0]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - mock_add_area.return_value = mock_gdf - - result = get_polish_gminy() - assert len(result) == 1 - - -class TestGetPolandBoundary: - """Tests for get_poland_boundary.""" - - @patch("python_pkg.geo_data._poland_admin.gpd.read_file") - @patch("python_pkg.geo_data._poland_admin.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - - result = get_poland_boundary() - assert result is mock_gdf - - @patch("python_pkg.geo_data._poland_admin.gpd.GeoDataFrame.to_file") - @patch("python_pkg.geo_data._poland_admin._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_admin.get_polish_wojewodztwa") - @patch("python_pkg.geo_data._poland_admin.CACHE_DIR") - def test_dissolves_from_wojewodztwa( - self, - mock_cache_dir: MagicMock, - mock_woj: MagicMock, - mock_ensure: MagicMock, - mock_to_file: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - woj_gdf = gpd.GeoDataFrame( - {"name": ["woj1", "woj2"]}, - geometry=[ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]), - ], - crs="EPSG:4326", - ) - mock_woj.return_value = woj_gdf - - result = get_poland_boundary() - assert len(result) == 1 diff --git a/python_pkg/geo_data/tests/test_poland_nature.py b/python_pkg/geo_data/tests/test_poland_nature.py deleted file mode 100644 index 1e252bc..0000000 --- a/python_pkg/geo_data/tests/test_poland_nature.py +++ /dev/null @@ -1,385 +0,0 @@ -"""Tests for python_pkg.geo_data._poland_nature module.""" - -from __future__ import annotations - -from typing import Any -from unittest.mock import MagicMock, patch - -import geopandas as gpd -import pytest -from shapely.geometry import Polygon - -from python_pkg.geo_data._poland_nature import ( - get_polish_mountain_peaks, - get_polish_mountain_ranges, - get_polish_national_parks, -) - - -def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]: - """Create a mock OSM relation element.""" - members = [] - if include_outer: - members.append( - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ) - return {"type": "relation", "tags": {"name": name}, "members": members} - - -class TestGetPolishMountainPeaks: - """Tests for get_polish_mountain_peaks.""" - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Rysy", "Babia Góra"], "elevation": [2499.0, 1725.0]}, - geometry=[ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]), - ], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_mountain_peaks() - assert result.iloc[0]["elevation"] == 2499.0 - - @patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._poland_nature._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_nature._overpass_query") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - @patch("python_pkg.geo_data._poland_nature.sys.stdout") - def test_downloads_peaks( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "node", - "tags": {"name": "Rysy", "ele": "2499"}, - "lon": 20.0, - "lat": 49.0, - }, - # Below threshold - { - "type": "node", - "tags": {"name": "LowPeak", "ele": "100"}, - "lon": 20.0, - "lat": 49.0, - }, - # Missing ele - { - "type": "node", - "tags": {"name": "NoEle"}, - "lon": 20.0, - "lat": 49.0, - }, - # Duplicate name - { - "type": "node", - "tags": {"name": "Rysy", "ele": "2499"}, - "lon": 20.0, - "lat": 49.0, - }, - # Not a node - { - "type": "way", - "tags": {"name": "Way", "ele": "500"}, - }, - # No name - { - "type": "node", - "tags": {"ele": "500"}, - "lon": 20.0, - "lat": 49.0, - }, - # Comma in ele - { - "type": "node", - "tags": {"name": "Peak2", "ele": "500,5 m"}, - "lon": 20.0, - "lat": 49.0, - }, - # Invalid ele - { - "type": "node", - "tags": {"name": "BadEle", "ele": "abc"}, - "lon": 20.0, - "lat": 49.0, - }, - ] - } - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Rysy", "Peak2"], "elevation": [2499.0, 500.5]}, - geometry=[ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]), - ], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - - result = get_polish_mountain_peaks() - assert result.iloc[0]["elevation"] == 2499.0 - - @patch("python_pkg.geo_data._poland_nature._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_nature._overpass_query") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - @patch("python_pkg.geo_data._poland_nature.sys.stdout") - def test_no_peaks_raises( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - - with pytest.raises(ValueError, match="No mountain peaks found"): - get_polish_mountain_peaks() - - -class TestGetPolishMountainRanges: - """Tests for get_polish_mountain_ranges.""" - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_with_area( - self, - mock_cache_dir: MagicMock, - mock_read: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - mock_gdf = gpd.GeoDataFrame( - {"name": ["Tatry"], "area_km2": [100.0]}, - geometry=[poly], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_mountain_ranges() - assert "area_km2" in result.columns - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_without_area( - self, - mock_cache_dir: MagicMock, - mock_read: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - mock_gdf = gpd.GeoDataFrame( - {"name": ["Tatry"]}, - geometry=[poly], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_mountain_ranges() - assert len(result) >= 0 - - @patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._poland_nature._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_nature._overpass_query") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - @patch("python_pkg.geo_data._poland_nature.sys.stdout") - def test_downloads_ranges( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - # Relation - _make_relation_element("Tatry"), - # Way with enough coords - { - "type": "way", - "tags": {"name": "Bieszczady"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # Way with auto-close - { - "type": "way", - "tags": {"name": "Karkonosze"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 0.5}, - ], - }, - # Way already closed (first == last) - { - "type": "way", - "tags": {"name": "Sudety"}, - "geometry": [ - {"lon": 2, "lat": 2}, - {"lon": 3, "lat": 2}, - {"lon": 3, "lat": 3}, - {"lon": 2, "lat": 2}, - ], - }, - # Way too few coords - { - "type": "way", - "tags": {"name": "Short"}, - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}], - }, - # Duplicate - _make_relation_element("Tatry"), - # No name - _make_relation_element(""), - # Unknown type - {"type": "node", "tags": {"name": "Ignored"}}, - # Way without geometry - {"type": "way", "tags": {"name": "NoGeom"}}, - # Relation without outer rings - _make_relation_element("NoOuter", include_outer=False), - ] - } - - poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - mock_gdf = gpd.GeoDataFrame( - {"name": ["Tatry", "Bieszczady", "Karkonosze", "Sudety"]}, - geometry=[poly, poly, poly, poly], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - - result = get_polish_mountain_ranges() - assert len(result) >= 0 - - -class TestGetPolishNationalParks: - """Tests for get_polish_national_parks.""" - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_with_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Tatrzański Park Narodowy"], "area_km2": [200.0]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_national_parks() - assert result.iloc[0]["area_km2"] == 200.0 - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_without_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Tatrzański Park Narodowy"]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_national_parks() - assert len(result) == 1 - - @patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._poland_nature._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_nature._overpass_query") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - @patch("python_pkg.geo_data._poland_nature.sys.stdout") - def test_downloads_parks( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - _make_relation_element("Tatrzański Park Narodowy"), - # Not a national park (missing "Narodowy") - _make_relation_element("Some Reserve"), - # Not a relation - {"type": "way", "tags": {"name": "Park Narodowy X"}}, - # No name - {"type": "relation", "tags": {}, "members": []}, - # Duplicate - _make_relation_element("Tatrzański Park Narodowy"), - # No outer rings - _make_relation_element("Empty Park Narodowy", include_outer=False), - # Case insensitive match - _make_relation_element("park narodowy Biebrzy"), - ] - } - - poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - mock_gdf = gpd.GeoDataFrame( - {"name": ["Tatrzański Park Narodowy", "park narodowy Biebrzy"]}, - geometry=[poly, poly], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - - result = get_polish_national_parks() - assert len(result) >= 0 diff --git a/python_pkg/geo_data/tests/test_poland_nature_part2.py b/python_pkg/geo_data/tests/test_poland_nature_part2.py deleted file mode 100644 index ef62b4a..0000000 --- a/python_pkg/geo_data/tests/test_poland_nature_part2.py +++ /dev/null @@ -1,418 +0,0 @@ -"""Tests for forests, nature reserves, and landscape parks download paths.""" - -from __future__ import annotations - -from typing import Any -from unittest.mock import MagicMock, patch - -import geopandas as gpd -from shapely.geometry import Polygon - -from python_pkg.geo_data._poland_nature import ( - get_polish_forests, - get_polish_landscape_parks, - get_polish_nature_reserves, -) - - -def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]: - """Create a mock OSM relation element.""" - members = [] - if include_outer: - members.append( - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ) - return {"type": "relation", "tags": {"name": name}, "members": members} - - -_POLY = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - - -class TestGetPolishForests: - """Tests for get_polish_forests.""" - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_with_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Puszcza Białowieska"], "area_km2": [600.0]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_forests() - assert result.iloc[0]["area_km2"] == 600.0 - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_without_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Puszcza Białowieska"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_forests() - assert len(result) == 1 - - def test_downloads_forests(self) -> None: - with ( - patch("python_pkg.geo_data._poland_nature.sys.stdout"), - patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_nature._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - # Valid forest with keyword - { - "type": "way", - "tags": {"name": "Puszcza Białowieska"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # Bory keyword - { - "type": "way", - "tags": {"name": "Bory Tucholskie"}, - "geometry": [ - {"lon": 2, "lat": 2}, - {"lon": 3, "lat": 2}, - {"lon": 3, "lat": 3}, - {"lon": 2, "lat": 3}, - ], - }, - # No forest keyword -> skip - { - "type": "way", - "tags": {"name": "Random Wood"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # Duplicate - { - "type": "way", - "tags": {"name": "Puszcza Białowieska"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # No name - {"type": "way", "tags": {}, "geometry": []}, - # Geometry extraction fails (too few coords) - { - "type": "way", - "tags": {"name": "Las Mały"}, - "geometry": [{"lon": 0, "lat": 0}], - }, - ] - } - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Puszcza Białowieska", "Bory Tucholskie"]}, - geometry=[_POLY, _POLY], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - gdf_with_area = mock_gdf.copy() - gdf_with_area["area_km2"] = [600.0, 300.0] - mock_add_area.return_value = gdf_with_area - - result = get_polish_forests() - assert len(result) == 2 - - def test_downloads_forests_empty(self) -> None: - with ( - patch("python_pkg.geo_data._poland_nature.sys.stdout"), - patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_nature._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - mock_from_features.return_value = empty_gdf - mock_add_area.return_value = empty_gdf - result = get_polish_forests() - assert len(result) == 0 - - -class TestGetPolishNatureReserves: - """Tests for get_polish_nature_reserves.""" - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_with_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Rezerwat X"], "area_km2": [50.0]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_nature_reserves() - assert result.iloc[0]["area_km2"] == 50.0 - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_without_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Rezerwat X"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_nature_reserves() - assert len(result) == 1 - - def test_downloads_reserves(self) -> None: - with ( - patch("python_pkg.geo_data._poland_nature.sys.stdout"), - patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_nature._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "way", - "tags": {"name": "Rezerwat A"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # Duplicate - { - "type": "way", - "tags": {"name": "Rezerwat A"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # No name - {"type": "way", "tags": {}, "geometry": []}, - # Geometry fails - { - "type": "way", - "tags": {"name": "Tiny"}, - "geometry": [{"lon": 0, "lat": 0}], - }, - ] - } - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Rezerwat A"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - gdf_with_area = mock_gdf.copy() - gdf_with_area["area_km2"] = [50.0] - mock_add_area.return_value = gdf_with_area - - result = get_polish_nature_reserves() - assert len(result) == 1 - - def test_downloads_reserves_empty(self) -> None: - with ( - patch("python_pkg.geo_data._poland_nature.sys.stdout"), - patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_nature._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - mock_from_features.return_value = empty_gdf - mock_add_area.return_value = empty_gdf - result = get_polish_nature_reserves() - assert len(result) == 0 - - -class TestGetPolishLandscapeParks: - """Tests for get_polish_landscape_parks.""" - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_with_area( - self, - mock_cache_dir: MagicMock, - mock_read: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Park Krajobrazowy X"], "area_km2": [100.0]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_landscape_parks() - assert result.iloc[0]["area_km2"] == 100.0 - - @patch("python_pkg.geo_data._poland_nature.gpd.read_file") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - def test_cached_without_area( - self, - mock_cache_dir: MagicMock, - mock_read: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Park Krajobrazowy X"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_landscape_parks() - assert len(result) == 1 - - @patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._poland_nature._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_nature._overpass_query") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - @patch("python_pkg.geo_data._poland_nature.sys.stdout") - def test_downloads_landscape_parks( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - _make_relation_element("Park Krajobrazowy A"), - # Not a relation -> skip - { - "type": "way", - "tags": {"name": "Park Krajobrazowy B"}, - "geometry": [], - }, - # No name - {"type": "relation", "tags": {}, "members": []}, - # Duplicate - _make_relation_element("Park Krajobrazowy A"), - # No outer rings - _make_relation_element("Park Empty", include_outer=False), - ] - } - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Park Krajobrazowy A"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - - result = get_polish_landscape_parks() - assert len(result) == 1 - - @patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._poland_nature._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_nature._overpass_query") - @patch("python_pkg.geo_data._poland_nature.CACHE_DIR") - @patch("python_pkg.geo_data._poland_nature.sys.stdout") - def test_downloads_landscape_parks_empty( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - mock_from_features.return_value = empty_gdf - result = get_polish_landscape_parks() - assert len(result) == 0 diff --git a/python_pkg/geo_data/tests/test_poland_water.py b/python_pkg/geo_data/tests/test_poland_water.py deleted file mode 100644 index a0516dc..0000000 --- a/python_pkg/geo_data/tests/test_poland_water.py +++ /dev/null @@ -1,466 +0,0 @@ -"""Tests for python_pkg.geo_data._poland_water module.""" - -from __future__ import annotations - -from typing import Any -from unittest.mock import MagicMock, patch - -import geopandas as gpd -from shapely.geometry import Polygon - -from python_pkg.geo_data._poland_water import ( - _extract_coastal_geometry, - _extract_river_coords_from_element, - get_polish_lakes, - get_polish_rivers, -) - - -class TestExtractCoastalGeometry: - """Tests for _extract_coastal_geometry.""" - - def test_relation_delegated(self) -> None: - element: dict[str, Any] = { - "type": "relation", - "members": [ - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ], - } - result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach")) - assert result is not None - - def test_way_line_type(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - } - result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach")) - assert result is not None - assert result["type"] == "LineString" - - def test_way_polygon_type(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach")) - assert result is not None - assert result["type"] == "Polygon" - - def test_way_polygon_auto_close(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 0.5}, - ], - } - result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach")) - assert result is not None - assert result["coordinates"][0][0] == result["coordinates"][0][-1] - - def test_way_polygon_already_closed(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 0}, - ], - } - result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach")) - assert result is not None - assert result["type"] == "Polygon" - assert len(result["coordinates"][0]) == 4 - - def test_way_too_short_for_polygon_not_line(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - ], - } - # 3 coords, >= MIN_LINE_COORDS but < MIN_RING_COORDS for polygon - result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach")) - # 3 coords is not enough for ring (need 4), so returns None - assert result is None - - def test_way_too_few_coords(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}], - } - result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach")) - assert result is None - - def test_not_way_or_relation(self) -> None: - element: dict[str, Any] = {"type": "node"} - result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach")) - assert result is None - - def test_way_no_geometry(self) -> None: - element: dict[str, Any] = {"type": "way"} - result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach")) - assert result is None - - -class TestExtractRiverCoordsFromElement: - """Tests for _extract_river_coords_from_element.""" - - def test_way_element(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - } - result = _extract_river_coords_from_element(element) - assert len(result) == 1 - - def test_way_too_few_coords(self) -> None: - element: dict[str, Any] = { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}], - } - result = _extract_river_coords_from_element(element) - assert len(result) == 0 - - def test_relation_element(self) -> None: - element: dict[str, Any] = { - "type": "relation", - "members": [ - { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - }, - { - "type": "way", - "geometry": [{"lon": 1, "lat": 1}, {"lon": 2, "lat": 2}], - }, - # Too few coords - { - "type": "way", - "geometry": [{"lon": 0, "lat": 0}], - }, - # Not a way - { - "type": "node", - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - }, - # No geometry - {"type": "way"}, - ], - } - result = _extract_river_coords_from_element(element) - assert len(result) == 2 - - def test_unknown_type(self) -> None: - element: dict[str, Any] = {"type": "node"} - result = _extract_river_coords_from_element(element) - assert len(result) == 0 - - def test_way_no_geometry(self) -> None: - element: dict[str, Any] = {"type": "way"} - result = _extract_river_coords_from_element(element) - assert len(result) == 0 - - -class TestGetPolishLakes: - """Tests for get_polish_lakes.""" - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_with_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Śniardwy"], "area_km2": [113.0]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_lakes() - assert result.iloc[0]["area_km2"] == 113.0 - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_without_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Śniardwy"]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_lakes() - assert len(result) == 1 - - def test_downloads_lakes(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "way", - "tags": {"name": "Śniardwy"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # Duplicate - { - "type": "way", - "tags": {"name": "Śniardwy"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # No name - {"type": "way", "tags": {}, "geometry": []}, - # Geometry extraction fails - { - "type": "way", - "tags": {"name": "Tiny"}, - "geometry": [{"lon": 0, "lat": 0}], - }, - ] - } - - poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - mock_gdf = gpd.GeoDataFrame( - {"name": ["Śniardwy"]}, - geometry=[poly], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - gdf_with_area = mock_gdf.copy() - gdf_with_area["area_km2"] = [113.0] - mock_add_area.return_value = gdf_with_area - - result = get_polish_lakes() - assert len(result) >= 0 - - def test_empty_result(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - - empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - mock_from_features.return_value = empty_gdf - mock_add_area.return_value = empty_gdf - - result = get_polish_lakes() - assert len(result) == 0 - - -class TestGetPolishRivers: - """Tests for get_polish_rivers.""" - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_with_length( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Wisła"], "length_km": [1047.0]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_rivers() - assert result.iloc[0]["length_km"] == 1047.0 - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_without_length( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Wisła"]}, - geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_polish_rivers() - assert len(result) == 1 - - def test_downloads_rivers(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_length_column" - ) as mock_add_length, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - # Way with wikidata - { - "type": "way", - "id": 1, - "tags": {"name": "Wisła", "wikidata": "Q54"}, - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - }, - # Way without wikidata - { - "type": "way", - "id": 2, - "tags": {"name": "Odra"}, - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - }, - # Relation - { - "type": "relation", - "id": 3, - "tags": {"name": "Bug", "wikidata": "Q55"}, - "members": [ - { - "type": "way", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 1}, - ], - }, - { - "type": "way", - "geometry": [ - {"lon": 1, "lat": 1}, - {"lon": 2, "lat": 2}, - ], - }, - ], - }, - # No name - { - "type": "way", - "id": 4, - "tags": {}, - "geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}], - }, - # Way with no coords - { - "type": "way", - "id": 5, - "tags": {"name": "Short"}, - "geometry": [{"lon": 0, "lat": 0}], - }, - ] - } - - poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - mock_gdf = gpd.GeoDataFrame( - {"name": ["Wisła", "Odra", "Bug"]}, - geometry=[poly, poly, poly], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - gdf_with_length = mock_gdf.copy() - gdf_with_length["length_km"] = [1047.0, 854.0, 772.0] - mock_add_length.return_value = gdf_with_length - - result = get_polish_rivers() - assert len(result) >= 0 - - def test_empty_result(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_length_column" - ) as mock_add_length, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - - empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - mock_from_features.return_value = empty_gdf - mock_add_length.return_value = empty_gdf - - result = get_polish_rivers() - assert len(result) == 0 diff --git a/python_pkg/geo_data/tests/test_poland_water_part2.py b/python_pkg/geo_data/tests/test_poland_water_part2.py deleted file mode 100644 index 92fa4c4..0000000 --- a/python_pkg/geo_data/tests/test_poland_water_part2.py +++ /dev/null @@ -1,397 +0,0 @@ -"""Tests for islands, coastal features, and UNESCO sites download paths.""" - -from __future__ import annotations - -from typing import Any -from unittest.mock import MagicMock, patch - -import geopandas as gpd -from shapely.geometry import Polygon - -from python_pkg.geo_data._poland_water import ( - get_polish_coastal_features, - get_polish_islands, - get_polish_unesco_sites, -) - - -def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]: - """Create a mock OSM relation element.""" - members = [] - if include_outer: - members.append( - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ) - return {"type": "relation", "tags": {"name": name}, "members": members} - - -_POLY = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)]) - - -class TestGetPolishIslands: - """Tests for get_polish_islands.""" - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_with_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Wolin"], "area_km2": [265.0]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_islands() - assert result.iloc[0]["area_km2"] == 265.0 - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_without_area( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Wolin"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_islands() - assert len(result) == 1 - - def test_downloads_islands(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "way", - "tags": {"name": "Wolin"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # Duplicate - { - "type": "way", - "tags": {"name": "Wolin"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # No name - {"type": "way", "tags": {}, "geometry": []}, - # Geometry fails - { - "type": "way", - "tags": {"name": "Tiny"}, - "geometry": [{"lon": 0, "lat": 0}], - }, - ] - } - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Wolin"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - gdf_with_area = mock_gdf.copy() - gdf_with_area["area_km2"] = [265.0] - mock_add_area.return_value = gdf_with_area - - result = get_polish_islands() - assert len(result) == 1 - - def test_downloads_islands_empty(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_area_column" - ) as mock_add_area, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - mock_from_features.return_value = empty_gdf - mock_add_area.return_value = empty_gdf - result = get_polish_islands() - assert len(result) == 0 - - -class TestGetPolishCoastalFeatures: - """Tests for get_polish_coastal_features.""" - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_with_length( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Mierzeja Helska"], "length_km": [35.0]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_coastal_features() - assert result.iloc[0]["length_km"] == 35.0 - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached_without_length( - self, mock_cache_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = gpd.GeoDataFrame( - {"name": ["Mierzeja Helska"]}, - geometry=[_POLY], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - result = get_polish_coastal_features() - assert len(result) == 1 - - def test_downloads_coastal_features(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_length_column" - ) as mock_add_length, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - # Peninsula (polygon type) - { - "type": "way", - "tags": {"name": "Hel", "natural": "peninsula"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # Cliff (line type) - { - "type": "way", - "tags": {"name": "Klif Orłowski", "natural": "cliff"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 1}, - ], - }, - # Duplicate - { - "type": "way", - "tags": {"name": "Hel", "natural": "peninsula"}, - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - }, - # No name - { - "type": "way", - "tags": {"natural": "cliff"}, - "geometry": [], - }, - # Geometry fails (no geometry key) - { - "type": "node", - "tags": {"name": "X", "natural": "cliff"}, - }, - ] - } - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Hel", "Klif Orłowski"]}, - geometry=[_POLY, _POLY], - crs="EPSG:4326", - ) - mock_from_features.return_value = mock_gdf - gdf_with_length = mock_gdf.copy() - gdf_with_length["length_km"] = [35.0, 5.0] - mock_add_length.return_value = gdf_with_length - - result = get_polish_coastal_features() - assert len(result) == 2 - - def test_downloads_coastal_features_empty(self) -> None: - with ( - patch("python_pkg.geo_data._poland_water.sys.stdout"), - patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query, - patch("python_pkg.geo_data._poland_water._ensure_cache_dir"), - patch( - "python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._poland_water._add_length_column" - ) as mock_add_length, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - mock_query.return_value = {"elements": []} - empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []}) - mock_from_features.return_value = empty_gdf - mock_add_length.return_value = empty_gdf - result = get_polish_coastal_features() - assert len(result) == 0 - - -class TestGetPolishUnescoSites: - """Tests for get_polish_unesco_sites.""" - - @patch("python_pkg.geo_data._poland_water.gpd.read_file") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - result = get_polish_unesco_sites() - assert result is mock_gdf - - @patch("python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._poland_water._ensure_cache_dir") - @patch("python_pkg.geo_data._poland_water._overpass_query") - @patch("python_pkg.geo_data._poland_water.CACHE_DIR") - @patch("python_pkg.geo_data._poland_water.sys.stdout") - def test_downloads_unesco_sites( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - # Node type - { - "type": "node", - "tags": {"name": "Kopalnia Soli Wieliczka"}, - "lon": 20.0, - "lat": 50.0, - }, - # Relation type - _make_relation_element("Stare Miasto w Krakowie"), - # Way type with enough coords - { - "type": "way", - "tags": {"name": "Auschwitz"}, - "geometry": [ - {"lon": 19, "lat": 50}, - {"lon": 19.1, "lat": 50}, - {"lon": 19.1, "lat": 50.1}, - {"lon": 19, "lat": 50.1}, - ], - }, - # Way already closed - { - "type": "way", - "tags": {"name": "Zamość"}, - "geometry": [ - {"lon": 23, "lat": 50.7}, - {"lon": 23.1, "lat": 50.7}, - {"lon": 23.1, "lat": 50.8}, - {"lon": 23, "lat": 50.7}, - ], - }, - # Way too few coords - { - "type": "way", - "tags": {"name": "TooShort"}, - "geometry": [ - {"lon": 19, "lat": 50}, - {"lon": 19.1, "lat": 50}, - ], - }, - # Duplicate - { - "type": "node", - "tags": {"name": "Kopalnia Soli Wieliczka"}, - "lon": 20.0, - "lat": 50.0, - }, - # No name - {"type": "node", "tags": {}, "lon": 0, "lat": 0}, - # Unknown type - {"type": "area", "tags": {"name": "Ignored"}}, - # Relation without outer rings - _make_relation_element("NoOuter", include_outer=False), - # Way without geometry key - {"type": "way", "tags": {"name": "NoGeom"}}, - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - - result = get_polish_unesco_sites() - assert result is mock_gdf diff --git a/python_pkg/geo_data/tests/test_warsaw.py b/python_pkg/geo_data/tests/test_warsaw.py deleted file mode 100644 index 2a58cb3..0000000 --- a/python_pkg/geo_data/tests/test_warsaw.py +++ /dev/null @@ -1,424 +0,0 @@ -"""Tests for python_pkg.geo_data._warsaw module.""" - -from __future__ import annotations - -from typing import Any -from unittest.mock import MagicMock, patch - -import geopandas as gpd -from shapely.geometry import LineString, Polygon - -from python_pkg.geo_data._warsaw import ( - _merge_bridge_segments, - get_vistula_river, - get_warsaw_boundary, - get_warsaw_bridges, - get_warsaw_districts, -) - - -class TestGetWarsawBoundary: - """Tests for get_warsaw_boundary.""" - - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - - result = get_warsaw_boundary() - assert result is mock_gdf - - @patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.to_file") - @patch("python_pkg.geo_data._warsaw._ensure_cache_dir") - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw._PKG_DIR") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - def test_from_districts_file_with_warszawa( - self, - mock_cache_dir: MagicMock, - mock_pkg_dir: MagicMock, - mock_read: MagicMock, - mock_ensure: MagicMock, - mock_to_file: MagicMock, - ) -> None: - mock_cache_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path) - mock_cache_path.exists.return_value = False - - mock_districts_path = MagicMock() - mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock()) - mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock( - return_value=MagicMock() - ) - mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = ( - MagicMock(return_value=mock_districts_path) - ) - mock_districts_path.exists.return_value = True - - mock_warsaw_gdf = gpd.GeoDataFrame( - {"name": ["Warszawa", "Mokotów"]}, - geometry=[ - Polygon([(20, 52), (21, 52), (21, 53), (20, 53)]), - Polygon([(20.5, 52.5), (20.6, 52.5), (20.6, 52.6), (20.5, 52.6)]), - ], - crs="EPSG:4326", - ) - mock_read.return_value = mock_warsaw_gdf - - result = get_warsaw_boundary() - assert len(result) == 1 - - @patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.to_file") - @patch("python_pkg.geo_data._warsaw._ensure_cache_dir") - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw._PKG_DIR") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - def test_from_districts_file_no_warszawa_entry( - self, - mock_cache_dir: MagicMock, - mock_pkg_dir: MagicMock, - mock_read: MagicMock, - mock_ensure: MagicMock, - mock_to_file: MagicMock, - ) -> None: - mock_cache_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path) - mock_cache_path.exists.return_value = False - - mock_districts_path = MagicMock() - mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock()) - mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock( - return_value=MagicMock() - ) - mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = ( - MagicMock(return_value=mock_districts_path) - ) - mock_districts_path.exists.return_value = True - - # No "Warszawa" entry - mock_warsaw_gdf = gpd.GeoDataFrame( - {"name": ["Mokotów", "Śródmieście"]}, - geometry=[ - Polygon([(20, 52), (21, 52), (21, 53), (20, 53)]), - Polygon([(20.5, 52.5), (20.6, 52.5), (20.6, 52.6), (20.5, 52.6)]), - ], - crs="EPSG:4326", - ) - mock_read.return_value = mock_warsaw_gdf - - result = get_warsaw_boundary() - assert len(result) == 1 - - def test_fallback_overpass(self) -> None: - with ( - patch("python_pkg.geo_data._warsaw.sys.stdout"), - patch("python_pkg.geo_data._warsaw.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._warsaw._PKG_DIR") as mock_pkg_dir, - patch("python_pkg.geo_data._warsaw._overpass_query") as mock_query, - patch("python_pkg.geo_data._warsaw._ensure_cache_dir"), - patch( - "python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - ): - mock_cache_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path) - mock_cache_path.exists.return_value = False - - mock_districts_path = MagicMock() - mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock()) - mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock( - return_value=MagicMock() - ) - nested = mock_pkg_dir.__truediv__.return_value.__truediv__ - nested.return_value.__truediv__ = MagicMock( - return_value=mock_districts_path - ) - mock_districts_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "relation", - "members": [ - { - "role": "outer", - "geometry": [ - {"lon": 20, "lat": 52}, - {"lon": 21, "lat": 52}, - {"lon": 21, "lat": 53}, - ], - }, - # non-outer member - { - "role": "inner", - "geometry": [ - {"lon": 20.5, "lat": 52.5}, - ], - }, - ], - }, - # Not a relation - {"type": "way"}, - # Relation with no outer geometry (empty coords) - { - "type": "relation", - "members": [ - {"role": "inner", "geometry": [{"lon": 20, "lat": 52}]}, - ], - }, - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - - result = get_warsaw_boundary() - assert result is mock_gdf - - -class TestGetWarsawDistricts: - """Tests for get_warsaw_districts.""" - - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw._PKG_DIR") - def test_districts_file_exists( - self, mock_pkg_dir: MagicMock, mock_read: MagicMock - ) -> None: - mock_districts_path = MagicMock() - mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock()) - mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock( - return_value=MagicMock() - ) - mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = ( - MagicMock(return_value=mock_districts_path) - ) - mock_districts_path.exists.return_value = True - - mock_gdf = gpd.GeoDataFrame( - {"name": ["Warszawa", "Mokotów", "Śródmieście"]}, - geometry=[ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), - ], - crs="EPSG:4326", - ) - mock_read.return_value = mock_gdf - - result = get_warsaw_districts() - assert "Warszawa" not in result["name"].values - - @patch("python_pkg.geo_data._warsaw._PKG_DIR") - def test_districts_file_not_found(self, mock_pkg_dir: MagicMock) -> None: - mock_districts_path = MagicMock() - mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock()) - mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock( - return_value=MagicMock() - ) - mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = ( - MagicMock(return_value=mock_districts_path) - ) - mock_districts_path.exists.return_value = False - - import pytest - - with pytest.raises(FileNotFoundError, match="Warsaw districts GeoJSON"): - get_warsaw_districts() - - -class TestGetVistulaRiver: - """Tests for get_vistula_river.""" - - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - - result = get_vistula_river() - assert result is mock_gdf - - @patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._warsaw._ensure_cache_dir") - @patch("python_pkg.geo_data._warsaw._overpass_query") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - @patch("python_pkg.geo_data._warsaw.sys.stdout") - def test_downloads( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "way", - "geometry": [ - {"lon": 20.0, "lat": 52.0}, - {"lon": 21.0, "lat": 52.5}, - ], - }, - # Too few coords - { - "type": "way", - "geometry": [{"lon": 20.0, "lat": 52.0}], - }, - # Not a way - {"type": "node"}, - # Way without geometry - {"type": "way"}, - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - - result = get_vistula_river() - assert result is mock_gdf - - -class TestGetWarsawBridges: - """Tests for get_warsaw_bridges.""" - - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - - result = get_warsaw_bridges() - assert result is mock_gdf - - def test_downloads(self) -> None: - with ( - patch("python_pkg.geo_data._warsaw.sys.stdout"), - patch("python_pkg.geo_data._warsaw.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._warsaw.get_vistula_river") as mock_vistula, - patch("python_pkg.geo_data._warsaw._overpass_query") as mock_query, - patch("python_pkg.geo_data._warsaw._ensure_cache_dir"), - patch( - "python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - # Create a real Vistula geometry for intersection tests - vistula_gdf = gpd.GeoDataFrame( - {"name": ["Wisła"]}, - geometry=[LineString([(20.0, 52.2), (21.0, 52.2)])], - crs="EPSG:4326", - ) - mock_vistula.return_value = vistula_gdf - - mock_query.return_value = { - "elements": [ - # Bridge that intersects vistula buffer - { - "type": "way", - "id": 1, - "tags": {"name": "Most Łazienkowski"}, - "geometry": [ - {"lon": 20.5, "lat": 52.19}, - {"lon": 20.5, "lat": 52.21}, - ], - }, - # Bridge far from vistula - { - "type": "way", - "id": 2, - "tags": {"name": "Most Daleki"}, - "geometry": [ - {"lon": 20.5, "lat": 55.0}, - {"lon": 20.5, "lat": 55.1}, - ], - }, - # Not a way - {"type": "node", "tags": {"name": "Most X"}}, - # Way without geometry - {"type": "way", "tags": {"name": "Most Y"}}, - # No name - { - "type": "way", - "id": 3, - "tags": {}, - "geometry": [ - {"lon": 20.5, "lat": 52.19}, - {"lon": 20.5, "lat": 52.21}, - ], - }, - # Duplicate - { - "type": "way", - "id": 4, - "tags": {"name": "Most Łazienkowski"}, - "geometry": [ - {"lon": 20.5, "lat": 52.19}, - {"lon": 20.5, "lat": 52.21}, - ], - }, - # Too few coords - { - "type": "way", - "id": 5, - "tags": {"name": "Most Short"}, - "geometry": [{"lon": 20.5, "lat": 52.19}], - }, - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - - result = get_warsaw_bridges() - assert result is mock_gdf - - -class TestMergeBridgeSegments: - """Tests for _merge_bridge_segments.""" - - def test_single_segment(self) -> None: - features: list[dict[str, Any]] = [ - { - "properties": {"name": "Most A"}, - "geometry": {"coordinates": [(20, 52), (21, 52)]}, - } - ] - result = _merge_bridge_segments(features) - assert len(result) == 1 - assert result[0]["geometry"]["type"] == "LineString" - - def test_multiple_segments_same_name(self) -> None: - features: list[dict[str, Any]] = [ - { - "properties": {"name": "Most A"}, - "geometry": {"coordinates": [(20, 52), (21, 52)]}, - }, - { - "properties": {"name": "Most A"}, - "geometry": {"coordinates": [(21, 52), (22, 52)]}, - }, - ] - result = _merge_bridge_segments(features) - assert len(result) == 1 - assert result[0]["geometry"]["type"] == "MultiLineString" diff --git a/python_pkg/geo_data/tests/test_warsaw_part2.py b/python_pkg/geo_data/tests/test_warsaw_part2.py deleted file mode 100644 index bc649d3..0000000 --- a/python_pkg/geo_data/tests/test_warsaw_part2.py +++ /dev/null @@ -1,176 +0,0 @@ -"""Tests for metro stations and osiedla download paths.""" - -from __future__ import annotations - -from typing import Any -from unittest.mock import MagicMock, patch - -import geopandas as gpd - -from python_pkg.geo_data._warsaw import ( - get_warsaw_metro_stations, - get_warsaw_osiedla, -) - - -def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]: - """Create a mock OSM relation element.""" - members = [] - if include_outer: - members.append( - { - "role": "outer", - "geometry": [ - {"lon": 0, "lat": 0}, - {"lon": 1, "lat": 0}, - {"lon": 1, "lat": 1}, - {"lon": 0, "lat": 1}, - ], - } - ) - return {"type": "relation", "tags": {"name": name}, "members": members} - - -class TestGetWarsawMetroStations: - """Tests for get_warsaw_metro_stations.""" - - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - result = get_warsaw_metro_stations() - assert result is mock_gdf - - @patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._warsaw._ensure_cache_dir") - @patch("python_pkg.geo_data._warsaw._overpass_query") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - @patch("python_pkg.geo_data._warsaw.sys.stdout") - def test_downloads_metro( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - # M1 only station - { - "type": "node", - "tags": {"name": "Kabaty"}, - "lon": 21.0, - "lat": 52.1, - }, - # M2 only station - { - "type": "node", - "tags": {"name": "Bródno"}, - "lon": 21.0, - "lat": 52.3, - }, - # M1/M2 interchange - { - "type": "node", - "tags": {"name": "Świętokrzyska"}, - "lon": 21.0, - "lat": 52.2, - }, - # Unknown station - { - "type": "node", - "tags": {"name": "Nowa Stacja"}, - "lon": 21.0, - "lat": 52.4, - }, - # Not a node -> skip - { - "type": "way", - "tags": {"name": "Metro Line"}, - }, - # Node without name -> skip - { - "type": "node", - "tags": {}, - "lon": 21.0, - "lat": 52.0, - }, - # Duplicate - { - "type": "node", - "tags": {"name": "Kabaty"}, - "lon": 21.0, - "lat": 52.1, - }, - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - - result = get_warsaw_metro_stations() - assert result is mock_gdf - - -class TestGetWarsawOsiedla: - """Tests for get_warsaw_osiedla.""" - - @patch("python_pkg.geo_data._warsaw.gpd.read_file") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - result = get_warsaw_osiedla() - assert result is mock_gdf - - @patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._warsaw._ensure_cache_dir") - @patch("python_pkg.geo_data._warsaw._overpass_query") - @patch("python_pkg.geo_data._warsaw.CACHE_DIR") - @patch("python_pkg.geo_data._warsaw.sys.stdout") - def test_downloads_osiedla( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - _make_relation_element("Mokotów"), - # Not a relation -> skip - { - "type": "way", - "tags": {"name": "Way Osiedle"}, - }, - # No name - {"type": "relation", "tags": {}, "members": []}, - # Duplicate - _make_relation_element("Mokotów"), - # No outer rings - _make_relation_element("Empty", include_outer=False), - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - - result = get_warsaw_osiedla() - assert result is mock_gdf diff --git a/python_pkg/geo_data/tests/test_warsaw_places.py b/python_pkg/geo_data/tests/test_warsaw_places.py deleted file mode 100644 index 40c526c..0000000 --- a/python_pkg/geo_data/tests/test_warsaw_places.py +++ /dev/null @@ -1,269 +0,0 @@ -"""Tests for python_pkg.geo_data._warsaw_places module.""" - -from __future__ import annotations - -from unittest.mock import MagicMock, patch - -import geopandas as gpd -from shapely.geometry import LineString - -from python_pkg.geo_data._warsaw_places import ( - _filter_streets_by_length, - get_warsaw_landmarks, - get_warsaw_streets, -) - - -class TestGetWarsawStreets: - """Tests for get_warsaw_streets.""" - - @patch("python_pkg.geo_data._warsaw_places._filter_streets_by_length") - @patch("python_pkg.geo_data._warsaw_places.gpd.read_file") - @patch("python_pkg.geo_data._warsaw_places.CACHE_DIR") - def test_cached( - self, - mock_cache_dir: MagicMock, - mock_read: MagicMock, - mock_filter: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - mock_filter.return_value = mock_gdf - - result = get_warsaw_streets() - assert result is mock_gdf - - def test_downloads(self) -> None: - with ( - patch("python_pkg.geo_data._warsaw_places.sys.stdout"), - patch("python_pkg.geo_data._warsaw_places.CACHE_DIR") as mock_cache_dir, - patch("python_pkg.geo_data._warsaw_places._overpass_query") as mock_query, - patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir"), - patch( - "python_pkg.geo_data._warsaw_places.gpd.GeoDataFrame.from_features" - ) as mock_from_features, - patch( - "python_pkg.geo_data._warsaw_places._filter_streets_by_length" - ) as mock_filter, - ): - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - { - "type": "way", - "tags": {"name": "Marszałkowska", "highway": "primary"}, - "geometry": [ - {"lon": 21.0, "lat": 52.2}, - {"lon": 21.0, "lat": 52.3}, - ], - }, - # Too few coords - { - "type": "way", - "tags": {"name": "Short"}, - "geometry": [{"lon": 21.0, "lat": 52.2}], - }, - # Not a way - {"type": "node", "tags": {"name": "Node"}}, - # Way without geometry - {"type": "way", "tags": {"name": "NoGeom"}}, - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - mock_filter.return_value = mock_gdf - - result = get_warsaw_streets() - assert result is mock_gdf - - -class TestFilterStreetsByLength: - """Tests for _filter_streets_by_length.""" - - def test_filters_and_merges(self) -> None: - gdf = gpd.GeoDataFrame( - { - "name": ["Marszałkowska", "Marszałkowska", "Unknown", "Short"], - "geometry": [ - LineString([(21.0, 52.2), (21.0, 52.3)]), - LineString([(21.0, 52.3), (21.0, 52.4)]), - LineString([(21.0, 52.2), (21.0, 52.3)]), - LineString([(21.0, 52.2), (21.001, 52.2001)]), - ], - }, - crs="EPSG:4326", - ) - result = _filter_streets_by_length(gdf, 500) - # Only streets >= 500m should be included - for _, row in result.iterrows(): - assert row["length_m"] >= 500 - - def test_single_segment(self) -> None: - gdf = gpd.GeoDataFrame( - { - "name": ["Marszałkowska"], - "geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])], - }, - crs="EPSG:4326", - ) - result = _filter_streets_by_length(gdf, 0) - # Single segment should remain a LineString - assert len(result) == 1 - - def test_unknown_name_excluded(self) -> None: - gdf = gpd.GeoDataFrame( - { - "name": ["Unknown"], - "geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])], - }, - crs="EPSG:4326", - ) - result = _filter_streets_by_length(gdf, 0) - assert len(result) == 0 - - def test_empty_name_excluded(self) -> None: - gdf = gpd.GeoDataFrame( - { - "name": [""], - "geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])], - }, - crs="EPSG:4326", - ) - result = _filter_streets_by_length(gdf, 0) - assert len(result) == 0 - - def test_no_name_column(self) -> None: - gdf = gpd.GeoDataFrame( - { - "geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])], - }, - crs="EPSG:4326", - ) - result = _filter_streets_by_length(gdf, 0) - assert len(result) == 0 - - -class TestGetWarsawLandmarks: - """Tests for get_warsaw_landmarks.""" - - @patch("python_pkg.geo_data._warsaw_places.gpd.read_file") - @patch("python_pkg.geo_data._warsaw_places.CACHE_DIR") - def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = True - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_read.return_value = mock_gdf - - result = get_warsaw_landmarks() - assert result is mock_gdf - - @patch("python_pkg.geo_data._warsaw_places.gpd.GeoDataFrame.from_features") - @patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir") - @patch("python_pkg.geo_data._warsaw_places._overpass_query") - @patch("python_pkg.geo_data._warsaw_places.CACHE_DIR") - @patch("python_pkg.geo_data._warsaw_places.sys.stdout") - def test_downloads( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - mock_from_features: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = { - "elements": [ - # Node with tourism - { - "type": "node", - "tags": {"name": "Muzeum Chopina", "tourism": "museum"}, - "lon": 21.0, - "lat": 52.2, - }, - # Way with center - { - "type": "way", - "tags": {"name": "Łazienki", "tourism": "attraction"}, - "center": {"lon": 21.0, "lat": 52.2}, - }, - # Node with historic - { - "type": "node", - "tags": {"name": "Kolumna Zygmunta", "historic": "monument"}, - "lon": 21.0, - "lat": 52.2, - }, - # Node with leisure - { - "type": "node", - "tags": {"name": "Park Skaryszewski", "leisure": "park"}, - "lon": 21.0, - "lat": 52.2, - }, - # Node no tourism/historic/leisure -> "landmark" - { - "type": "node", - "tags": {"name": "Generic"}, - "lon": 21.0, - "lat": 52.2, - }, - # Duplicate - { - "type": "node", - "tags": {"name": "Muzeum Chopina", "tourism": "museum"}, - "lon": 21.0, - "lat": 52.2, - }, - # No name - { - "type": "node", - "tags": {"tourism": "museum"}, - "lon": 21.0, - "lat": 52.2, - }, - # Way without center - { - "type": "way", - "tags": {"name": "No Center"}, - }, - ] - } - - mock_gdf = MagicMock(spec=gpd.GeoDataFrame) - mock_from_features.return_value = mock_gdf - - result = get_warsaw_landmarks() - assert result is mock_gdf - - @patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir") - @patch("python_pkg.geo_data._warsaw_places._overpass_query") - @patch("python_pkg.geo_data._warsaw_places.CACHE_DIR") - @patch("python_pkg.geo_data._warsaw_places.sys.stdout") - def test_empty_result( - self, - mock_stdout: MagicMock, - mock_cache_dir: MagicMock, - mock_query: MagicMock, - mock_ensure: MagicMock, - ) -> None: - mock_path = MagicMock() - mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path) - mock_path.exists.return_value = False - - mock_query.return_value = {"elements": []} - - result = get_warsaw_landmarks() - assert len(result) == 0 diff --git a/python_pkg/pdfCentered/.gitignore b/python_pkg/pdfCentered/.gitignore deleted file mode 100644 index e2b048f..0000000 --- a/python_pkg/pdfCentered/.gitignore +++ /dev/null @@ -1,163 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ -*.pdf diff --git a/python_pkg/pdfCentered/.python-version b/python_pkg/pdfCentered/.python-version deleted file mode 100644 index a5c4c76..0000000 --- a/python_pkg/pdfCentered/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.9.0 diff --git a/python_pkg/pdfCentered/pytest.ini b/python_pkg/pdfCentered/pytest.ini deleted file mode 100644 index f745106..0000000 --- a/python_pkg/pdfCentered/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ - -[pytest] -filterwarnings = - ignore::DeprecationWarning diff --git a/python_pkg/split/__init__.py b/python_pkg/split/__init__.py deleted file mode 100644 index 48ce478..0000000 --- a/python_pkg/split/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Symmetric splitting utilities package.""" diff --git a/python_pkg/split/run.sh b/python_pkg/split/run.sh deleted file mode 100755 index f2639b1..0000000 --- a/python_pkg/split/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash -set -e -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -"$SCRIPT_DIR/../../.venv/bin/python" "$SCRIPT_DIR/split_x_into_n_symmetrically.py" "$@" diff --git a/python_pkg/split/split_x_into_n_symmetrically.py b/python_pkg/split/split_x_into_n_symmetrically.py deleted file mode 100644 index 3e546bb..0000000 --- a/python_pkg/split/split_x_into_n_symmetrically.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Distribute values symmetrically across N parts.""" - - -def calculate_symmetric_weights( - n: int, middle_weight: float, factors: list[float] | None = None -) -> list[float]: - """Calculate symmetric weights for both even and odd N. - - Args: - n: Number of parts to split into. - middle_weight: The middle value for symmetry. - factors: If provided, controls the difference in weights. - Must have length n // 2 or n // 2 - 1 depending on n. - - Returns: - List of symmetric weights. - """ - half_n = n // 2 - weights_left: list[float] = [middle_weight] - - if factors: - for factor in factors: - next_weight = weights_left[-1] + factor - weights_left.append(next_weight) - else: - weights_left.extend(middle_weight - (idx + 1) for idx in range(half_n - 1)) - - if not n % 2: - weights = weights_left[::-1] + weights_left - else: - weights = [*weights_left[::-1], middle_weight, *weights_left] - - return weights - - -def scale_to_total(x: float, weights: list[float]) -> list[float]: - """Scale the weights so that their sum is proportional to X. - - Args: - x: Total value to distribute. - weights: The list of weights to be scaled. - - Returns: - List of scaled values summing to x. - """ - total_weight = sum(weights) - base_unit = x / total_weight - return [base_unit * weight for weight in weights] - - -def split_x_into_n_symmetrically(x: float, n: int, factors: list[float]) -> list[float]: - """Split X into N parts with symmetric weights controlled by factors.""" - weights = calculate_symmetric_weights(n, middle_weight=1, factors=factors) - return scale_to_total(x, weights) - - -def split_x_into_n_middle(x: float, n: int, middle_value: float) -> list[float]: - """Split X into N parts with symmetric weights using middle_value as peak.""" - weights = calculate_symmetric_weights(n, middle_weight=middle_value) - return scale_to_total(x, weights) diff --git a/python_pkg/split/tests/__init__.py b/python_pkg/split/tests/__init__.py deleted file mode 100644 index 3d67b07..0000000 --- a/python_pkg/split/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for split module.""" diff --git a/python_pkg/split/tests/test_split.py b/python_pkg/split/tests/test_split.py deleted file mode 100644 index 3e5f860..0000000 --- a/python_pkg/split/tests/test_split.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Unit tests for split_x_into_n_symmetrically module.""" - -import pytest - -from python_pkg.split.split_x_into_n_symmetrically import ( - calculate_symmetric_weights, - scale_to_total, - split_x_into_n_middle, - split_x_into_n_symmetrically, -) - - -class TestCalculateSymmetricWeights: - """Tests for calculate_symmetric_weights function.""" - - def test_odd_n_without_factors(self) -> None: - """Test odd N creates symmetric weights around middle.""" - weights = calculate_symmetric_weights(n=5, middle_weight=3) - # For n=5, half_n=2, should be symmetric around middle - assert len(weights) == 5 - # Check symmetry - assert weights[0] == weights[-1] - assert weights[1] == weights[-2] - - def test_even_n_without_factors(self) -> None: - """Test even N creates symmetric weights.""" - weights = calculate_symmetric_weights(n=4, middle_weight=2) - assert len(weights) == 4 - # Check symmetry - assert weights[0] == weights[-1] - assert weights[1] == weights[-2] - - def test_with_factors(self) -> None: - """Test custom factors are applied correctly.""" - weights = calculate_symmetric_weights(n=4, middle_weight=1, factors=[0.5, 0.3]) - # Factors control growth from middle, so we get 2 * len(factors) + mirrored - assert len(weights) == 6 # Actual behavior based on factors - # Check symmetry - assert weights[0] == weights[-1] - assert weights[1] == weights[-2] - - def test_n_equals_1(self) -> None: - """Test single part returns weights based on algorithm.""" - weights = calculate_symmetric_weights(n=1, middle_weight=5) - # Odd case with half_n=0: [middle_weight] reversed + middle + [middle_weight] - assert weights == [5, 5, 5] - - def test_n_equals_2(self) -> None: - """Test two parts returns two equal weights.""" - weights = calculate_symmetric_weights(n=2, middle_weight=3) - assert len(weights) == 2 - assert weights[0] == weights[1] - - -class TestScaleToTotal: - """Tests for scale_to_total function.""" - - def test_scale_to_total_basic(self) -> None: - """Test weights are scaled to sum to x.""" - weights = [1.0, 2.0, 1.0] - scaled = scale_to_total(x=100, weights=weights) - assert sum(scaled) == pytest.approx(100) - - def test_scale_preserves_proportions(self) -> None: - """Test scaling preserves relative proportions.""" - weights = [1.0, 2.0, 3.0] - scaled = scale_to_total(x=60, weights=weights) - # Original sum is 6, so each unit = 10 - assert scaled[0] == pytest.approx(10) - assert scaled[1] == pytest.approx(20) - assert scaled[2] == pytest.approx(30) - - def test_scale_with_floats(self) -> None: - """Test scaling works with float weights.""" - weights = [0.5, 1.0, 0.5] - scaled = scale_to_total(x=10, weights=weights) - assert sum(scaled) == pytest.approx(10) - - -class TestSplitXIntoNSymmetrically: - """Tests for split_x_into_n_symmetrically function.""" - - def test_split_basic(self) -> None: - """Test basic split with factors.""" - result = split_x_into_n_symmetrically(x=100, n=4, factors=[0.5, 0.2]) - # Length depends on factors, not just n - assert len(result) == 6 # Actual behavior - assert sum(result) == pytest.approx(100) - # Check symmetry - assert result[0] == pytest.approx(result[-1]) - assert result[1] == pytest.approx(result[-2]) - - def test_split_preserves_total(self) -> None: - """Test that the split preserves the total value.""" - result = split_x_into_n_symmetrically(x=1000, n=5, factors=[0.1, 0.2]) - assert sum(result) == pytest.approx(1000) - - -class TestSplitXIntoNMiddle: - """Tests for split_x_into_n_middle function.""" - - def test_split_middle_basic(self) -> None: - """Test basic split using middle value.""" - result = split_x_into_n_middle(x=100, n=3, middle_value=2) - assert len(result) == 3 - assert sum(result) == pytest.approx(100) - - def test_split_middle_symmetric(self) -> None: - """Test that result is symmetric.""" - result = split_x_into_n_middle(x=100, n=5, middle_value=3) - assert result[0] == pytest.approx(result[-1]) - assert result[1] == pytest.approx(result[-2]) - - def test_split_middle_even_parts(self) -> None: - """Test split with even number of parts.""" - result = split_x_into_n_middle(x=50, n=4, middle_value=1) - assert len(result) == 4 - assert sum(result) == pytest.approx(50) diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 09d9d85..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,191 +0,0 @@ -# ============================================================================== -# Python Development Dependencies - Linting, Formatting, and Testing -# ============================================================================== -# Install with: pip install -r requirements-dev.txt -# ============================================================================== - -# Include base requirements --r requirements.txt - -# add-trailing-comma - Add trailing commas -add-trailing-comma>=3.1.0 - -# autoflake - Remove unused imports and variables -autoflake>=2.2.0 - -# autopep8 - PEP 8 formatting (alternative formatter) -autopep8>=2.0.0 - -# ============================================================================== -# SECURITY LINTERS -# ============================================================================== - -# Bandit - Security linter -bandit>=1.7.0 - -# Black - The uncompromising code formatter (fallback/comparison) -black>=24.0.0 - -# ============================================================================== -# SPELL CHECKING -# ============================================================================== - -# codespell - Fix common misspellings -codespell>=2.2.0 - -# Coverage.py - Code coverage measurement -coverage>=7.4.0 - -# darglint - Check docstrings match function signatures -darglint>=1.8.0 - -# dead - Find dead code -dead>=1.5.0 - -# docformatter - Formats docstrings -docformatter>=1.7.0 - -# fixit - Auto-fix linting errors -fixit>=2.1.0 - -# Flake8 - Linting tool (wraps pyflakes, pycodestyle, mccabe) -flake8>=7.0.0 -flake8-annotations>=3.0.0 # Type annotation checks -flake8-bandit>=4.1.0 # Security checks via bandit - -# Flake8 plugins for maximum coverage -flake8-bugbear>=24.0.0 # Additional bug detection -flake8-comprehensions>=3.14.0 # Better list/dict/set comprehensions -flake8-docstrings>=1.7.0 # Docstring checks -flake8-eradicate>=1.5.0 # Dead code detection -flake8-pie>=0.16.0 # Miscellaneous lints -flake8-print>=5.0.0 # Detect print statements -flake8-pyi>=24.0.0 # Type stub file checks -flake8-pytest-style>=2.0.0 # Pytest style checks -flake8-return>=1.2.0 # Better return statement checks -flake8-simplify>=0.21.0 # Simplification suggestions - -# Hypothesis - Property-based testing -hypothesis>=6.98.0 - -# ============================================================================== -# IMPORT CHECKING -# ============================================================================== - -# importlib-metadata for import analysis -importlib-metadata>=7.0.0 - -# ============================================================================== -# DOCUMENTATION -# ============================================================================== - -# pep257 - PEP 257 docstring checker (legacy, use pydocstyle) -# interrogate - Check docstring coverage -interrogate>=1.5.0 - -# isort - Import sorting (ruff handles this, but useful standalone) -isort>=5.13.0 - -# mccabe - McCabe complexity checker -mccabe>=0.7.0 - -# ============================================================================== -# TYPE CHECKING -# ============================================================================== - -# MyPy - Static type checker -mypy>=1.8.0 - -# pip-audit - Audit Python packages for known vulnerabilities -pip-audit>=2.6.0 - -# pipdeptree - Show dependency tree -pipdeptree>=2.14.0 - -# ============================================================================== -# PRE-COMMIT -# ============================================================================== - -# pre-commit - Git hook management -pre-commit>=3.6.0 - -# prospector - Python static analysis tool -prospector>=1.10.0 - -# pycodestyle - Python style guide checker (PEP 8) -pycodestyle>=2.11.0 - -# pydocstyle - Docstring style checker (PEP 257) -pydocstyle>=6.3.0 - -# pyflakes - Passive checker of Python programs -pyflakes>=3.2.0 - -# pylama - Code audit tool (wraps multiple linters) -pylama>=8.4.0 - -# ============================================================================== -# LINTERS -# ============================================================================== - -# Pylint - Comprehensive Python linter -pylint>=3.0.0 - -# Pyright - Microsoft's type checker (very strict) -pyright>=1.1.350 - -# ============================================================================== -# TESTING -# ============================================================================== - -# pytest - Testing framework -pytest>=8.0.0 - -# pytest plugins -pytest-cov>=4.1.0 # Coverage plugin -pytest-randomly>=3.15.0 # Randomize test order -pytest-sugar>=1.0.0 # Better test output -pytest-timeout>=2.2.0 # Test timeouts -pytest-xdist>=3.5.0 # Parallel test execution - -# ============================================================================== -# ADDITIONAL TOOLS -# ============================================================================== - -# pyupgrade - Upgrade Python syntax -pyupgrade>=3.15.0 - -# Radon - Code metrics (complexity, maintainability) -radon>=6.0.0 - -# reorder-python-imports - Reorder imports -reorder-python-imports>=3.12.0 - -# ============================================================================== -# CODE FORMATTERS -# ============================================================================== - -# Ruff - Extremely fast Python linter and formatter (replaces many tools) -ruff>=0.8.0 - -# Safety - Check dependencies for known security vulnerabilities -safety>=2.3.0 -types-python-dateutil>=2.8.0 -types-PyYAML>=6.0.0 - -# Type stubs for common packages -types-requests>=2.31.0 -types-setuptools>=69.0.0 - -# ============================================================================== -# CODE QUALITY & DEAD CODE DETECTION -# ============================================================================== - -# Vulture - Find dead code -vulture>=2.10 - -# xenon - Monitor code complexity -xenon>=0.9.0 - -# yapf - Yet Another Python Formatter (Google's formatter) -yapf>=0.40.0 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 19b82a4..0000000 --- a/requirements.txt +++ /dev/null @@ -1,21 +0,0 @@ -aiohttp>=3.9 -beautifulsoup4>=4.0 -berserk>=0.13 -bottle>=0.12 -genanki>=0.13 -geopandas>=1.0 -howlongtobeatpy>=1.0 -lxml>=5.0 - -# Optional dependencies for specific scripts (needed for full pylint analysis) -matplotlib>=3.0 -mitmproxy>=10.0 -numpy>=1.20 -opencv-python>=4.0 -pillow>=10.0 -pygame>=2.0 -pytest>=7.0 -python-chess>=1.999 -requests>=2.0 -selenium>=4.0 -websockets>=13.0 diff --git a/requirements.txt b/requirements.txt new file mode 120000 index 0000000..3daa3c2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +meta/requirements.txt \ No newline at end of file diff --git a/run.sh b/run.sh deleted file mode 100755 index f445898..0000000 --- a/run.sh +++ /dev/null @@ -1,148 +0,0 @@ -#!/bin/bash -# Easy entrypoint for system usage reports and polling script diagnostics. -# Usage: -# ./run.sh # today's report to stdout -# ./run.sh --date 20260501 # specific day -# ./run.sh --top 25 # override row count -# ./run.sh --profile [duration] # profile polling scripts (default 60s) -# ./run.sh --diagnose # find inefficient shell scripts -# ./run.sh --init-artifacts ... # bootstrap contract/evidence/session artifacts -# -# Any other args are forwarded to usage_report.py unchanged. - -set -euo pipefail - -SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" -REPORT_SCRIPT="$SCRIPT_DIR/linux_configuration/scripts/system-maintenance/bin/usage_report.py" -ARTIFACT_INIT_SCRIPT="$SCRIPT_DIR/scripts/init_agent_artifacts.sh" - -if [[ ! -f "$REPORT_SCRIPT" ]]; then - echo "Error: usage_report.py not found at: $REPORT_SCRIPT" >&2 - exit 1 -fi - -if [[ ! -f "$ARTIFACT_INIT_SCRIPT" ]]; then - echo "Error: init_agent_artifacts.sh not found at: $ARTIFACT_INIT_SCRIPT" >&2 - exit 1 -fi - -# Profiling mode: trace fork-heavy scripts over time -profile_polling_scripts() { - local duration="${1:-60}" - echo "=== Polling Script Profiler (${duration}s) ===" >&2 - echo "Tracing fork/exec calls in shell scripts..." >&2 - echo "" >&2 - - # Find common polling script processes and trace them - local trace_file="/tmp/polling_trace_$$.txt" - - # Use perf/strace to capture system calls - ( - timeout "$duration" strace -f -e trace=clone,execve -c -p $$ 2>&1 || true - ) > "$trace_file" 2>&1 - - echo "Trace completed. Analyzing results:" >&2 - echo "" >&2 - - # Show fork/exec heavy processes - if ! grep -e "execve" -e "clone" "$trace_file" | head -20; then - : - fi - - rm -f "$trace_file" -} - -# Diagnostic mode: find inefficient patterns in shell scripts -diagnose_polling_scripts() { - echo "=== Shell Script Efficiency Audit ===" >&2 - echo "" >&2 - - local issues_found=0 - - # Check for common anti-patterns - echo "Checking for anti-patterns in shell scripts..." >&2 - echo "" >&2 - - # Pattern 1: while true with sleep (no event-driven check) - echo "1. Polling loops (while true + sleep):" >&2 - set +e - grep -r "while true\|while :" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ - | grep -v "Binary" | grep -v ".git" | head -5 - set -e - issues_found=$((issues_found + 1)) - echo "" >&2 - - # Pattern 2: $(date +...) calls in loops (fork-heavy) - echo "2. Excessive date calls (each forks a process):" >&2 - set +e - grep -r '\$(date' --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ - | grep -v "Binary" | grep -v ".git" | head -5 - set -e - issues_found=$((issues_found + 1)) - echo "" >&2 - - # Pattern 3: pgrep/xdotool in loops - echo "3. Process inspection in loops (pgrep, xdotool):" >&2 - set +e - grep -r "while.*pgrep\|while.*xdotool\|pgrep.*while" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ - | grep -v "Binary" | grep -v ".git" | head -5 - set -e - issues_found=$((issues_found + 1)) - echo "" >&2 - - # Pattern 4: pipes in hot paths - echo "4. Heavy pipes in polling scripts (| awk, | grep, | tr):" >&2 - set +e - while_true_file_list="$(mktemp)" - heavy_pipe_matches="$(mktemp)" - grep -r "while true" --include="*.sh" "$SCRIPT_DIR" > "$while_true_file_list" 2>/dev/null - if [ -s "$while_true_file_list" ]; then - xargs grep -l -e " | awk" -e " | grep" -e " | tr" < "$while_true_file_list" > "$heavy_pipe_matches" 2>/dev/null - head -5 "$heavy_pipe_matches" - fi - rm -f "$while_true_file_list" "$heavy_pipe_matches" - set -e - issues_found=$((issues_found + 1)) - echo "" >&2 - - # Pattern 5: sleep with very short intervals - echo "5. Aggressive polling (sleep < 1s):" >&2 - set +e - grep -rE "sleep 0\.[0-9]|sleep 0[^0-9]" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \ - | grep -v "Binary" | grep -v ".git" | head -5 - set -e - issues_found=$((issues_found + 1)) - echo "" >&2 - - echo "=== Recommendations ===" >&2 - echo "1. Replace 'while true + sleep' with event-driven I/O (inotifywait, read -t, etc.)" >&2 - echo "2. Use /proc and /sys instead of forking date, sensors, acpi, etc." >&2 - echo "3. Cache frequently accessed values (e.g., in /tmp state files)" >&2 - echo "4. Use bash builtins: printf %()T instead of date, \${var//} instead of tr, etc." >&2 - echo "5. Use i3blocks interval=persist + event loop instead of polling mode" >&2 - echo "6. Increase polling intervals: 1s → 5s → 10s where acceptable" >&2 -} - -# Handle special modes -case "${1:-}" in - --profile) - profile_polling_scripts "${2:-60}" - exit 0 - ;; - --diagnose) - diagnose_polling_scripts - exit 0 - ;; - --init-artifacts) - shift - exec "$ARTIFACT_INIT_SCRIPT" "$@" - ;; - --help) - grep '^# Usage:' "$0" | sed 's/^# //' | head -1 - grep '^# ' "$0" | sed 's/^# / /' - exit 0 - ;; -esac - -# Default: run usage_report.py with all remaining args -exec python3 "$REPORT_SCRIPT" "$@" diff --git a/run.sh b/run.sh new file mode 120000 index 0000000..9b30594 --- /dev/null +++ b/run.sh @@ -0,0 +1 @@ +meta/run.sh \ No newline at end of file diff --git a/scripts/check_c_cpp_build_files.sh b/scripts/check_c_cpp_build_files.sh deleted file mode 100755 index d8a2bac..0000000 --- a/scripts/check_c_cpp_build_files.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash -# Check that every directory containing C/C++ source files has a Makefile and run.sh. -# Used as a pre-commit hook; receives staged file paths as arguments. - -set -uo pipefail - -errors=() -declare -A checked_dirs - -for file in "$@"; do - dir=$(dirname "$file") - - # Skip build directories and CMake artefact trees - if echo "$dir" | grep -qE '(^|/)build(/|$)'; then - continue - fi - - # Skip if already checked this directory - [[ -v checked_dirs["$dir"] ]] && continue - checked_dirs["$dir"]=1 - - # Check for Makefile (case-insensitive: Makefile or makefile) - if ! compgen -G "$dir/[Mm]akefile" > /dev/null 2>&1; then - errors+=("MISSING Makefile in: $dir") - fi - - # Check for run.sh - if [[ ! -f "$dir/run.sh" ]]; then - errors+=("MISSING run.sh in: $dir") - fi -done - -if [[ ${#errors[@]} -gt 0 ]]; then - printf 'C/C++ build file check failed:\n' - printf ' %s\n' "${errors[@]}" - printf '\nEvery directory with .c/.cpp files must have a Makefile and run.sh.\n' - exit 1 -fi - -exit 0 diff --git a/setup.sh b/setup.sh deleted file mode 100755 index 8768e24..0000000 --- a/setup.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -# Post-clone setup script for testsAndMisc repository. -# Run once after cloning: ./setup.sh - -set -euo pipefail - -repo_root="$(git rev-parse --show-toplevel)" -cd "$repo_root" - -printf 'Configuring git hooks path...\n' -git config core.hooksPath linux_configuration/.githooks -printf ' ✓ core.hooksPath set to linux_configuration/.githooks\n' - -# Check for C/C++ and shell lint tools (used by pre-commit hooks) -MISSING=() -for cmd in clang-format cppcheck flawfinder shellcheck node npx; do - command -v "$cmd" >/dev/null 2>&1 || MISSING+=("$cmd") -done - -if [[ ${#MISSING[@]} -gt 0 ]]; then - printf '\n⚠ Missing tools for pre-commit hooks: %s\n' "${MISSING[*]}" - if command -v pacman >/dev/null 2>&1; then - printf ' Install with: sudo pacman -S --needed %s\n' "${MISSING[*]}" - elif command -v apt-get >/dev/null 2>&1; then - printf ' Install with: sudo apt-get install %s\n' "${MISSING[*]}" - else - printf ' Please install: %s\n' "${MISSING[*]}" - fi -else - printf ' ✓ All lint tools available\n' -fi - -printf '\nSetup complete.\n'