chore: consolidate root configs into meta/, drop unused C dir + split/pdfCentered/geo_data

- Move pyproject.toml, .pre-commit-config.yaml, requirements.txt, run.sh,
  lint_python.sh, .fvmrc into meta/ with root symlinks preserving tool
  auto-discovery.
- Combine requirements.txt + requirements-dev.txt into meta/requirements.txt
  (single sorted source of truth).
- Remove setup.sh, .binary-allowlist, C/ (no native code remains),
  python_pkg/{split,pdfCentered,geo_data}, scripts/check_c_cpp_build_files.sh.
- Drop clang-format/cppcheck/flawfinder/check-c-cpp-build-files hooks and
  archived path excludes from pre-commit config.
- Add .secret-patterns to .gitignore and untrack it (sensitive content;
  full history purge is a follow-up step).
This commit is contained in:
Krzysztof kuhy Rudnicki 2026-05-14 20:39:46 +02:00
parent 84e5d39137
commit 89b4f59ce9
58 changed files with 1348 additions and 9539 deletions

View File

@ -1,3 +0,0 @@
# Binary files allowed in the repository.
# One glob pattern per line. These are essential for builds and cannot be external.
# Lines starting with # are comments.

3
.fvmrc
View File

@ -1,3 +0,0 @@
{
"flutter": "stable"
}

1
.fvmrc Symbolic link
View File

@ -0,0 +1 @@
meta/.fvmrc

3
.gitignore vendored
View File

@ -1,5 +1,8 @@
# See http://help.github.com/ignore-files/ for more about ignoring files.
# Sensitive — must NEVER be committed (contains regex of home GPS coordinates etc.)
.secret-patterns
# Compiled output
/dist
/tmp

View File

@ -1,449 +0,0 @@
# ==============================================================================
# Pre-commit Configuration - Multi-language Linting & Formatting
# ==============================================================================
# Install: pre-commit install && pre-commit install --hook-type pre-push
# Fast lint: pre-commit run --all-files (linters only, ~10 s)
# Full suite: pre-commit run --all-files --hook-stage pre-push (+ tests)
# Update hooks: pre-commit autoupdate
# ==============================================================================
# Global settings
default_language_version:
python: python3
# Fail fast on first error (set to false to see all errors)
fail_fast: false
# Configuration
ci:
autofix_commit_msg: "style: auto-fix by pre-commit hooks"
autoupdate_commit_msg: "chore: update pre-commit hooks"
repos:
# ===========================================================================
# GENERAL HOOKS - File formatting and validation
# ===========================================================================
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer
- id: check-yaml
args: [--unsafe]
- id: check-json
# Exclude JSONC files (VS Code configs, TypeScript configs) and compile_commands.json
exclude: ^(\.vscode/|.*/\.vscode/|C/compile_commands\.json|.*tsconfig.*\.json)
- id: check-toml
- id: check-xml
- id: check-added-large-files
args: [--maxkb=2000]
- id: check-merge-conflict
- id: check-case-conflict
- id: check-symlinks
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
- id: detect-private-key
- id: debug-statements
- id: name-tests-test
args: [--pytest-test-first]
- id: check-ast
- id: check-builtin-literals
- id: check-docstring-first
- id: fix-byte-order-marker
- id: mixed-line-ending
args: [--fix=lf]
- id: requirements-txt-fixer
# ===========================================================================
# BINARY BLOCKER - Prevent binary/image files from being committed
# ===========================================================================
- repo: local
hooks:
- id: no-binaries
name: Block binary/image files
entry: scripts/check_no_binaries.sh
language: script
always_run: false
- id: ai-evidence-contract
name: Require AI evidence artifacts for code changes
entry: scripts/check_ai_evidence.sh
language: script
pass_filenames: false
always_run: true
- id: ai-multifile-contract
name: Require workflow contract for multi-file code changes
entry: scripts/check_agent_contract.sh
language: script
pass_filenames: false
always_run: true
- id: append-only-sessions
name: Enforce append-only session logs
entry: scripts/check_append_only_sessions.sh
language: script
pass_filenames: false
always_run: true
# ===========================================================================
# POLLING SCRIPT LINTER - Detect fork-storm anti-patterns in shell scripts
# ===========================================================================
- repo: local
hooks:
- id: no-polling-antipatterns
name: Block polling script anti-patterns
entry: scripts/check_polling_antipatterns.sh
language: script
types: [shell]
exclude: ^(\.git/|C/|CPP/|phone_focus_mode/lib/tests/|tests/)
# ===========================================================================
# NOQA BLOCKER - Zero tolerance for noqa/type:ignore suppression comments
# ===========================================================================
- repo: local
hooks:
- id: no-noqa
name: Block noqa comments
entry: '(?i)#\s*(noqa|type:\s*ignore)'
language: pygrep
types: [python]
- id: no-ruff-noqa
name: Block ruff noqa file-level comments
entry: '(?i)#\s*ruff:\s*noqa'
language: pygrep
types: [python]
# ===========================================================================
# RUFF - Fast Python linter and formatter (replaces black, isort, flake8, etc.)
# ===========================================================================
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.2
hooks:
# Linter - run first to catch issues
- id: ruff
args:
- --fix
- --unsafe-fixes
- --exit-non-zero-on-fix
- --show-fixes
types_or: [python, pyi]
# Formatter - run after linting
- id: ruff-format
types_or: [python, pyi]
# ===========================================================================
# MYPY - Static type checking (runs on push only for speed)
# ===========================================================================
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.13.0
hooks:
- id: mypy
stages: [pre-push]
args:
- --ignore-missing-imports
- --no-error-summary
- --disable-error-code=no-untyped-def
- --disable-error-code=no-untyped-call
- --disable-error-code=var-annotated
- --disable-error-code=no-any-unimported
- --disable-error-code=type-arg
- --disable-error-code=no-any-return
- --disable-error-code=misc
- --disable-error-code=unused-ignore
- --disable-error-code=unreachable
- --disable-error-code=assignment
- --disable-error-code=no-redef
- --disable-error-code=attr-defined
- --disable-error-code=arg-type
- --disable-error-code=union-attr
- --disable-error-code=call-overload
- --disable-error-code=return-value
- --disable-error-code=redundant-cast
- --disable-error-code=empty-body
- --disable-error-code=list-item
exclude: >-
(?x)^(
Bash/.*|
\.venv/.*|
linux_configuration/scripts/misc/testsAndMisc-bash/tools/.*
)$
additional_dependencies:
- types-requests
- types-PyYAML
- types-python-dateutil
# ===========================================================================
# PYLINT - Comprehensive Python linter (runs on push only for speed)
# ===========================================================================
- repo: https://github.com/pylint-dev/pylint
rev: v3.3.2
hooks:
- id: pylint
stages: [pre-push]
args:
- --rcfile=pyproject.toml
- --fail-under=8.0
- --jobs=0
additional_dependencies:
- pytest
- python-chess
- requests
- pygame
exclude: ^(Bash/|\.venv/)
# ===========================================================================
# BANDIT - Security linter (runs on push only for speed)
# ===========================================================================
- repo: https://github.com/PyCQA/bandit
rev: 1.7.10
hooks:
- id: bandit
stages: [pre-push]
args:
- -c
- pyproject.toml
- --severity-level=high
- --confidence-level=medium
- --skip=B113
additional_dependencies: ["bandit[toml]"]
exclude: ^(Bash/|\.venv/|tests/|.*test.*\.py$)
# ===========================================================================
# PYTEST + COVERAGE - Run tests and enforce 100% code coverage
# Only tests for subpackages with changed files are run (see script).
# Runs on push only (slow); use --hook-stage pre-push to run manually.
# ===========================================================================
- repo: local
hooks:
- id: pytest-coverage
name: pytest with coverage enforcement
entry: python scripts/pytest_changed_packages.py
language: system
types: [python]
pass_filenames: true
stages: [pre-push]
# ===========================================================================
# VULTURE - Dead code detection (disabled - doesn't work well with pre-commit)
# ===========================================================================
# - repo: https://github.com/jendrikseipp/vulture
# rev: v2.13
# hooks:
# - id: vulture
# args:
# - --min-confidence=80
# - --exclude=.venv,Bash,__pycache__
# exclude: ^(Bash/|\.venv/)
# ===========================================================================
# PYUPGRADE - Upgrade Python syntax (disabled - incompatible with Python 3.14)
# ===========================================================================
# - repo: https://github.com/asottile/pyupgrade
# rev: v3.19.0
# hooks:
# - id: pyupgrade
# args:
# - --py310-plus
# ===========================================================================
# CODESPELL - Spell checking in code (expanded ignore list for non-English)
# ===========================================================================
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
args:
- --skip=*.json,*.lock,*.min.js,*.min.css,.git,__pycache__,.venv,*.txt
- --ignore-words-list=als,ans,ect,nd,som,sur,te,nam,numer,lew,sie,wil,postion,clen,ther,folow,derrive,ony,tje,noe,theses,crate,doubleclick,wile,tabel,pary,blok,bloc,proces,serwer,parametr,adres,hart,dout,metod,tekst,synonim,grup,mosty,lokal,skalar,milion,nowe,tre,hel,alph
exclude: ^(Bash/ffmpeg-build/|LaTeX/|CPP/|.*\.geojson$)
# ===========================================================================
# DOCFORMATTER - Format docstrings (disabled - causes recursion errors)
# ===========================================================================
# - repo: local
# hooks:
# - id: docformatter
# name: docformatter
# entry: docformatter
# language: system
# types: [python]
# args:
# - --in-place
# - --wrap-summaries=88
# - --wrap-descriptions=88
# ===========================================================================
# INTERROGATE - Docstring coverage (disabled - causes recursion on large files)
# ===========================================================================
# - repo: https://github.com/econchick/interrogate
# rev: 1.7.0
# hooks:
# - id: interrogate
# args:
# - --fail-under=0
# - --verbose
# - --ignore-init-method
# - --ignore-init-module
# - --ignore-magic
# - --ignore-private
# - --ignore-semiprivate
# - --exclude=Bash,.venv,__pycache__
# pass_filenames: false
# ===========================================================================
# AUTOFLAKE - Remove unused imports/variables
# Disabled: fully redundant with ruff (F401, F841, F811) + --fix
# ===========================================================================
# - repo: https://github.com/PyCQA/autoflake
# rev: v2.3.1
# hooks:
# - id: autoflake
# args:
# - --in-place
# - --remove-all-unused-imports
# - --remove-unused-variables
# - --remove-duplicate-keys
# - --expand-star-imports
# ===========================================================================
# SAFETY - Check for security vulnerabilities in dependencies
# ===========================================================================
# Note: Safety requires API key for full functionality, disabled by default
# - repo: https://github.com/Lucas-C/pre-commit-hooks-safety
# rev: v1.3.2
# hooks:
# - id: python-safety-dependencies-check
# files: requirements.*\.txt$
# ===========================================================================
# PYRIGHT - Microsoft's type checker (very strict, optional)
# ===========================================================================
# Uncomment to enable - can be slow and very strict
# - repo: https://github.com/RobertCraiworthy/pyright-action
# rev: v1.1.350
# hooks:
# - id: pyright
# ===========================================================================
# CHECK JSON/YAML/TOML formatting (runs on push only — slow Node.js startup)
# ===========================================================================
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier
types_or: [yaml, json, markdown]
exclude: ^(Bash/|\.venv/|.*\.lock$|C/compile_commands\.json)
stages: [pre-push]
# ===========================================================================
# SHELLCHECK - Shell script linting
# Wrapper batches files to avoid OOM on large repos.
# ===========================================================================
- repo: local
hooks:
- id: shellcheck
name: shellcheck
entry: bash -c 'printf "%s\0" "$@" | xargs -0 -n 40 shellcheck --severity=warning' --
language: system
types: [shell]
# ===========================================================================
# CLANG-FORMAT - C/C++ code formatting
# ===========================================================================
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v19.1.6
hooks:
- id: clang-format
types_or: [c, c++]
# ===========================================================================
# CPPCHECK - C/C++ static analysis
# ===========================================================================
- repo: local
hooks:
- id: cppcheck
name: cppcheck
entry: cppcheck
language: system
types_or: [c, c++]
args:
- --enable=warning,portability
- --force
- --quiet
- --error-exitcode=1
- --inline-suppr
- --suppress=missingIncludeSystem
- --suppress=syntaxError
- --suppress=nullPointerOutOfResources
- --suppress=ctunullpointerOutOfResources
- --suppress=ctunullpointerOutOfMemory
- --std=c11
# ===========================================================================
# FLAWFINDER - C/C++ security scanner
# ===========================================================================
- repo: local
hooks:
- id: flawfinder
name: flawfinder
entry: flawfinder
language: system
types_or: [c, c++]
args:
- --error-level=5
- --quiet
- --columns
# ===========================================================================
# CHECK C/C++ BUILD FILES - Ensure every C/C++ dir has Makefile and run.sh
# ===========================================================================
- repo: local
hooks:
- id: check-c-cpp-build-files
name: check C/C++ dirs have Makefile and run.sh
entry: scripts/check_c_cpp_build_files.sh
language: script
types_or: [c, c++]
# ===========================================================================
# CHECK PYTHON LOCATION - All Python files must be under python_pkg/
# ===========================================================================
- repo: local
hooks:
- id: check-python-location
name: check Python files are under python_pkg/
entry: scripts/check_python_location.sh
language: script
types: [python]
# ===========================================================================
# REMOVE EMPTY DIRECTORIES - Clean up empty folders in the repo
# ===========================================================================
- repo: local
hooks:
- id: remove-empty-dirs
name: remove empty directories
entry: find . -type d -empty -not -path './.git/*' -delete -print
language: system
pass_filenames: false
always_run: true
# ===========================================================================
# SECRET PATTERNS - Block commits containing sensitive data
# ===========================================================================
- repo: local
hooks:
- id: check-no-secrets
name: check for leaked secrets
entry: scripts/check_no_secrets.sh
language: script
exclude: ^(\.secret-patterns|\.pre-commit-config\.yaml|.*\.geojson)$
# ===========================================================================
# COMMITIZEN - Conventional commits (optional)
# ===========================================================================
# - repo: https://github.com/commitizen-tools/commitizen
# rev: v3.13.0
# hooks:
# - id: commitizen
# - id: commitizen-branch
# stages: [push]

1
.pre-commit-config.yaml Symbolic link
View File

@ -0,0 +1 @@
meta/.pre-commit-config.yaml

View File

@ -1,10 +0,0 @@
BasedOnStyle: LLVM
IndentWidth: 4
TabWidth: 4
UseTab: Never
ColumnLimit: 100
SortIncludes: true
AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: true
AllowShortIfStatementsOnASingleLine: false
BreakBeforeBraces: Allman

View File

@ -1,18 +0,0 @@
Checks: >
clang-analyzer-*,
-clang-analyzer-security.*,
bugprone-*,
cert-err33-c,
cert-err34-c,
cert-fio38-c,
performance-*,
portability-*,
misc-unused-parameters
WarningsAsErrors: >
clang-analyzer-*,
bugprone-*,
cert-err33-c,
cert-err34-c,
cert-fio38-c
HeaderFilterRegex: ".*"
FormatStyle: none

1
C/.gitignore vendored
View File

@ -1 +0,0 @@
random_engine

View File

@ -1,33 +0,0 @@
CC := gcc
CFLAGS := -O2 -std=c11 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Wno-unused-parameter
COV := -O0 -g --coverage -std=c11 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Wno-unused-parameter -DATOP_AGG_NO_MAIN
SRC := atop_agg.c
HDR := atop_agg.h
BIN := atop_agg
.PHONY: all clean rebuild test coverage
all: $(BIN)
$(BIN): $(SRC) $(HDR)
$(CC) $(CFLAGS) -o $@ $(SRC)
test_atop_agg: test_atop_agg.c atop_agg.c atop_agg.h
$(CC) $(COV) -o test_atop_agg test_atop_agg.c atop_agg.c
test: test_atop_agg
./test_atop_agg
coverage: test_atop_agg
./test_atop_agg
lcov --capture --directory . --output-file coverage.info --no-external
lcov --remove coverage.info '*/test_atop_agg.c' --output-file coverage.info
genhtml coverage.info --output-directory coverage_html
@echo "Coverage report at coverage_html/index.html"
clean:
rm -f $(BIN) test_atop_agg *.o *.gcda *.gcno coverage.info
rm -rf coverage_html
rebuild: clean all

View File

@ -1,474 +0,0 @@
/*
* atop_agg fast per-PID aggregator for `atop -P PRC,PRM` output.
*
* Reads atop parseable output on stdin, folds it into per-PID CPU-tick
* and RSS trackers, and prints a compact TSV summary on stdout that a
* higher-level driver (Python) then name-folds into human-readable
* tables. This avoids the ~3s Python parse cost on a typical day's
* 1.7M-line atop dump; the C hot loop completes in well under a second
* so the pipeline runs at atop's own ~2s wall-clock floor.
*
* Output TSV lines:
* W<TAB>start_epoch<TAB>end_epoch<TAB>distinct_samples<TAB>median_interval
* C<TAB>pid<TAB>name<TAB>delta_ticks
* R<TAB>pid<TAB>name<TAB>peak_kb<TAB>sum_kb<TAB>samples
*/
#include "atop_agg.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
/*
* A real-world day of atop on a dev box can see >700k distinct PIDs
* because every short-lived compiler/shell subprocess gets a fresh ID.
* 2M slots keeps the load factor below ~40% for that workload, keeping
* linear-probe chains short without dynamic resizing.
*/
#define HASH_CAP_BITS 21
#define HASH_CAP (1u << HASH_CAP_BITS)
#define HASH_MASK (HASH_CAP - 1u)
#define MAX_EPOCHS 4096
#define MAX_TOKENS 64
/* Knuth multiplicative hash → index in an open-addressed table. */
static unsigned int hash_pid(int pid)
{
unsigned int k = (unsigned int)pid;
return (k * 2654435761u) >> (32 - HASH_CAP_BITS);
}
static PidCpu *cpu_slot(State *s, int pid)
{
unsigned int h = hash_pid(pid);
for (unsigned int probes = 0; probes < HASH_CAP; probes++, h++)
{
PidCpu *slot = &s->cpu[h & HASH_MASK];
if (slot->pid == pid)
{
return slot;
}
if (slot->pid == 0)
{
slot->pid = pid;
slot->first_ticks = -1;
slot->last_ticks = 0;
slot->samples = 0;
slot->name[0] = '\0';
return slot;
}
}
/* Table full — drop the sample rather than loop forever. */
return NULL;
}
static PidRam *ram_slot(State *s, int pid)
{
unsigned int h = hash_pid(pid);
for (unsigned int probes = 0; probes < HASH_CAP; probes++, h++)
{
PidRam *slot = &s->ram[h & HASH_MASK];
if (slot->pid == pid)
{
return slot;
}
if (slot->pid == 0)
{
slot->pid = pid;
slot->peak_kb = 0;
slot->sum_kb = 0;
slot->samples = 0;
slot->name[0] = '\0';
return slot;
}
}
return NULL;
}
static void add_epoch(State *s, long epoch)
{
/* Linear scan — there are only a few dozen distinct epochs per log. */
for (int i = 0; i < s->n_epochs; i++)
{
if (s->epochs[i] == epoch)
{
return;
}
}
if (s->n_epochs < MAX_EPOCHS)
{
s->epochs[s->n_epochs++] = epoch;
}
}
/*
* Tokenise a whitespace-separated line in place. Fills *tokens* with
* pointers into *line* and returns the token count. A process name
* wrapped in parentheses is rejoined into a single token with spaces
* preserved (atop emits `(Web Content)` as three whitespace-split
* tokens, which we merge back).
*/
int tokenize_line(char *line, char **tokens, int max_tokens)
{
int n = 0;
char *p = line;
while (*p && n < max_tokens)
{
while (*p == ' ' || *p == '\t')
{
p++;
}
if (!*p || *p == '\n')
{
break;
}
char *start = p;
if (*p == '(')
{
/* Consume through the matching ')', preserving interior spaces. */
while (*p && *p != ')')
{
p++;
}
if (*p == ')')
{
p++;
}
}
else
{
while (*p && *p != ' ' && *p != '\t' && *p != '\n')
{
p++;
}
}
if (*p)
{
*p = '\0';
p++;
}
tokens[n++] = start;
}
return n;
}
/*
* Copy *src* into *dst* (capacity *cap*), stripping a leading '(' and
* trailing ')' if both are present. Always null-terminates. If the
* resulting name is empty, writes "unknown".
*/
void copy_name(char *dst, size_t cap, const char *src)
{
size_t len = strlen(src);
size_t start = 0;
if (len >= 2 && src[0] == '(' && src[len - 1] == ')')
{
start = 1;
len -= 2;
}
if (len == 0)
{
const char *fallback = "unknown";
size_t flen = strlen(fallback);
if (flen >= cap)
{
flen = cap - 1;
}
memcpy(dst, fallback, flen);
dst[flen] = '\0';
return;
}
if (len >= cap)
{
len = cap - 1;
}
memcpy(dst, src + start, len);
dst[len] = '\0';
}
/*
* Parse one PRC/PRM line and update *s*. Unknown labels and malformed
* records are silently skipped (atop emits a stable schema, but guard
* against future changes and header/separator lines).
*/
void process_line(char *line, State *s)
{
char *tokens[MAX_TOKENS];
int n = tokenize_line(line, tokens, MAX_TOKENS);
if (n < 11)
{
return;
}
const char *label = tokens[0];
int is_prc = (label[0] == 'P' && label[1] == 'R' && label[2] == 'C' && label[3] == '\0');
int is_prm = (label[0] == 'P' && label[1] == 'R' && label[2] == 'M' && label[3] == '\0');
if (!is_prc && !is_prm)
{
return;
}
long epoch = strtol(tokens[2], NULL, 10);
int pid = (int)strtol(tokens[6], NULL, 10);
if (pid <= 0)
{
return;
}
const char *name_tok = tokens[7];
if (is_prc)
{
long utime = strtol(tokens[9], NULL, 10);
long stime = strtol(tokens[10], NULL, 10);
long ticks = utime + stime;
add_epoch(s, epoch);
PidCpu *slot = cpu_slot(s, pid);
if (slot == NULL)
{
return;
}
if (slot->first_ticks < 0)
{
slot->first_ticks = ticks;
}
slot->last_ticks = ticks;
slot->samples++;
copy_name(slot->name, sizeof(slot->name), name_tok);
return;
}
/* PRM */
if (n < 12)
{
return;
}
long rsize_kb = strtol(tokens[11], NULL, 10);
PidRam *slot = ram_slot(s, pid);
if (slot == NULL)
{
return;
}
if (rsize_kb > slot->peak_kb)
{
slot->peak_kb = rsize_kb;
}
slot->sum_kb += rsize_kb;
slot->samples++;
copy_name(slot->name, sizeof(slot->name), name_tok);
}
static int cmp_long(const void *a, const void *b)
{
long la = *(const long *)a;
long lb = *(const long *)b;
if (la < lb)
{
return -1;
}
if (la > lb)
{
return 1;
}
return 0;
}
/* FNV-1a 32-bit over a NUL-terminated string; used to key the name table. */
static unsigned int fnv1a(const char *s)
{
unsigned int h = 2166136261u;
while (*s)
{
h ^= (unsigned char)*s++;
h *= 16777619u;
}
return h;
}
/*
* Per-name aggregate, built in a second pass over cpu/ram tables so that
* the caller only has to parse a few thousand output rows instead of one
* row per PID. The name table is deliberately oversized (64k slots for an
* expected few-thousand names) to keep linear-probe chains short.
*/
#define NAME_CAP_BITS 16
#define NAME_CAP (1u << NAME_CAP_BITS)
#define NAME_MASK (NAME_CAP - 1u)
typedef struct
{
char name[ATOP_AGG_NAME_MAX];
long cpu_ticks;
int cpu_pids;
long peak_kb;
long sum_avg_kb;
int rss_samples;
int ram_pids;
char used;
} NameAgg;
static NameAgg *name_slot(NameAgg *table, const char *name)
{
unsigned int h = fnv1a(name);
for (unsigned int probes = 0; probes < NAME_CAP; probes++, h++)
{
NameAgg *slot = &table[h & NAME_MASK];
if (!slot->used)
{
slot->used = 1;
/* copy_name already enforced \0-termination on the source. */
size_t i = 0;
while (name[i] && i + 1 < sizeof(slot->name))
{
slot->name[i] = name[i];
i++;
}
slot->name[i] = '\0';
return slot;
}
if (strcmp(slot->name, name) == 0)
{
return slot;
}
}
return NULL;
}
/* Write the aggregated summary to *out* in the documented TSV schema. */
void emit_results(State *s, FILE *out)
{
long start_epoch = 0;
long end_epoch = 0;
long median_interval = 0;
if (s->n_epochs > 0)
{
qsort(s->epochs, (size_t)s->n_epochs, sizeof(long), cmp_long);
start_epoch = s->epochs[0];
end_epoch = s->epochs[s->n_epochs - 1];
if (s->n_epochs >= 2)
{
long deltas[MAX_EPOCHS];
for (int i = 0; i < s->n_epochs - 1; i++)
{
deltas[i] = s->epochs[i + 1] - s->epochs[i];
}
qsort(deltas, (size_t)(s->n_epochs - 1), sizeof(long), cmp_long);
median_interval = deltas[(s->n_epochs - 1) / 2];
}
}
fprintf(out, "W\t%ld\t%ld\t%d\t%ld\n", start_epoch, end_epoch, s->n_epochs, median_interval);
NameAgg *names = calloc(NAME_CAP, sizeof(NameAgg));
if (!names)
{
return;
}
for (unsigned int i = 0; i < HASH_CAP; i++)
{
PidCpu *slot = &s->cpu[i];
if (slot->pid == 0)
{
continue;
}
long delta = slot->last_ticks;
if (slot->samples >= 2)
{
delta = slot->last_ticks - slot->first_ticks;
if (delta < 0)
{
delta = 0;
}
}
NameAgg *na = name_slot(names, slot->name);
if (!na)
{
continue;
}
na->cpu_ticks += delta;
na->cpu_pids++;
}
for (unsigned int i = 0; i < HASH_CAP; i++)
{
PidRam *slot = &s->ram[i];
if (slot->pid == 0)
{
continue;
}
long avg_kb = slot->samples ? slot->sum_kb / slot->samples : 0;
NameAgg *na = name_slot(names, slot->name);
if (!na)
{
continue;
}
if (slot->peak_kb > na->peak_kb)
{
na->peak_kb = slot->peak_kb;
}
na->sum_avg_kb += avg_kb;
na->rss_samples++;
na->ram_pids++;
}
for (unsigned int i = 0; i < NAME_CAP; i++)
{
NameAgg *na = &names[i];
if (!na->used)
{
continue;
}
int pids = na->cpu_pids > na->ram_pids ? na->cpu_pids : na->ram_pids;
fprintf(out, "N\t%s\t%ld\t%ld\t%ld\t%d\t%d\n", na->name, na->cpu_ticks, na->peak_kb,
na->sum_avg_kb, na->rss_samples, pids);
}
free(names);
}
State *state_new(void)
{
State *s = calloc(1, sizeof(State));
if (!s)
{
return NULL;
}
s->cpu = calloc(HASH_CAP, sizeof(PidCpu));
s->ram = calloc(HASH_CAP, sizeof(PidRam));
s->epochs = calloc(MAX_EPOCHS, sizeof(long));
if (!s->cpu || !s->ram || !s->epochs)
{
state_free(s);
return NULL;
}
s->n_epochs = 0;
return s;
}
void state_free(State *s)
{
if (!s)
{
return;
}
free(s->cpu);
free(s->ram);
free(s->epochs);
free(s);
}
#ifndef ATOP_AGG_NO_MAIN
int main(void)
{
State *s = state_new();
if (!s)
{
fprintf(stderr, "atop_agg: out of memory\n");
return 1;
}
char *line = NULL;
size_t cap = 0;
ssize_t got;
while ((got = getline(&line, &cap, stdin)) != -1)
{
process_line(line, s);
}
free(line);
emit_results(s, stdout);
state_free(s);
return 0;
}
#endif

View File

@ -1,42 +0,0 @@
#ifndef ATOP_AGG_H
#define ATOP_AGG_H
#include <stdio.h>
/* NAME_MAX capped to keep slot size compact; typical atop comm is 15 chars. */
#define ATOP_AGG_NAME_MAX 40
typedef struct
{
int pid;
char name[ATOP_AGG_NAME_MAX];
long first_ticks;
long last_ticks;
int samples;
} PidCpu;
typedef struct
{
int pid;
char name[ATOP_AGG_NAME_MAX];
long peak_kb;
long sum_kb;
int samples;
} PidRam;
typedef struct
{
PidCpu *cpu;
PidRam *ram;
long *epochs;
int n_epochs;
} State;
State *state_new(void);
void state_free(State *s);
int tokenize_line(char *line, char **tokens, int max_tokens);
void copy_name(char *dst, size_t cap, const char *src);
void process_line(char *line, State *s);
void emit_results(State *s, FILE *out);
#endif

View File

@ -1,12 +0,0 @@
#!/usr/bin/env bash
# Build and demo atop_agg on today's atop log.
set -euo pipefail
cd "$(dirname "$0")"
make
LOG="${1:-/var/log/atop/atop_$(date +%Y%m%d)}"
if [[ ! -f "$LOG" ]]; then
echo "No atop log at $LOG; pass a path as arg 1." >&2
exit 1
fi
echo "Aggregating $LOG ..." >&2
atop -r "$LOG" -P PRC,PRM | ./atop_agg | head -20

View File

@ -1,226 +0,0 @@
/*
* Unit tests for atop_agg helpers. Compiled with --coverage; aims for
* 100% line coverage of atop_agg.c (excluding main, which is guarded
* by -DATOP_AGG_NO_MAIN).
*/
#include "atop_agg.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int failures = 0;
#define CHECK(cond) \
do \
{ \
if (!(cond)) \
{ \
fprintf(stderr, "FAIL %s:%d: %s\n", __FILE__, __LINE__, #cond); \
failures++; \
} \
} while (0)
static void test_copy_name(void)
{
char buf[16];
copy_name(buf, sizeof(buf), "(bash)");
CHECK(strcmp(buf, "bash") == 0);
copy_name(buf, sizeof(buf), "bash");
CHECK(strcmp(buf, "bash") == 0);
copy_name(buf, sizeof(buf), "()");
CHECK(strcmp(buf, "unknown") == 0);
copy_name(buf, sizeof(buf), "");
CHECK(strcmp(buf, "unknown") == 0);
/* Truncation. */
copy_name(buf, sizeof(buf), "(veryverylongnameabc)");
CHECK(strlen(buf) == sizeof(buf) - 1);
/* Fallback truncation: buf too small for "unknown" itself. */
char tiny[4];
copy_name(tiny, sizeof(tiny), "");
CHECK(strcmp(tiny, "unk") == 0);
}
static void test_tokenize(void)
{
char line[] = "PRC host 1000 2026/01/01 12:00:00 600 123 (bash) S 10 20\n";
char *toks[32];
int n = tokenize_line(line, toks, 32);
CHECK(n == 11);
CHECK(strcmp(toks[0], "PRC") == 0);
CHECK(strcmp(toks[7], "(bash)") == 0);
CHECK(strcmp(toks[10], "20") == 0);
/* Multi-word parenthesised name. */
char line2[] = "PRM host 1000 d t 600 200 (Web Content) S 4096 1 2 0 0\n";
char *t2[32];
int n2 = tokenize_line(line2, t2, 32);
CHECK(n2 >= 12);
CHECK(strncmp(t2[7], "(Web Content)", 13) == 0);
/* Empty / whitespace-only line. */
char empty[] = " \n";
char *t3[4];
CHECK(tokenize_line(empty, t3, 4) == 0);
/* Max-tokens cap respected. */
char big[] = "a b c d e f g h i j k";
char *t4[3];
CHECK(tokenize_line(big, t4, 3) == 3);
/* Unclosed paren at EOL — consumed to end. */
char unclosed[] = "(abc";
char *t5[2];
int n5 = tokenize_line(unclosed, t5, 2);
CHECK(n5 == 1);
CHECK(strcmp(t5[0], "(abc") == 0);
}
static void test_process_and_emit(void)
{
State *s = state_new();
assert(s != NULL);
/* Two PRC samples for PID 100: first utime+stime=30, last=100.
Delta should be 70. */
char prc1[] = "PRC h 1000 d t 600 100 (cc1) S 10 20\n";
char prc2[] = "PRC h 1600 d t 600 100 (cc1) S 70 30\n";
process_line(prc1, s);
process_line(prc2, s);
/* One PRM sample for PID 100: rss=4096 kB. */
char prm1[] = "PRM h 1000 d t 600 100 (cc1) S 4096 100 4096 0 0\n";
process_line(prm1, s);
/* PRC sample for PID 200 seen only once → delta == last_ticks. */
char prc3[] = "PRC h 1000 d t 600 200 (short) S 5 5\n";
process_line(prc3, s);
/* Header / separator / unknown label should be ignored. */
char header[] = "# comment line\n";
process_line(header, s);
char sep[] = "SEP\n";
process_line(sep, s);
char other[] = "CPU h 1000 d t 600 0 0 0 0 0 0 0 0\n";
process_line(other, s);
/* Malformed: pid <= 0. */
char bad_pid[] = "PRC h 1000 d t 600 0 (x) S 1 1\n";
process_line(bad_pid, s);
/* PRC short (<11 tokens) should not crash. */
char prc_short[] = "PRC h 1000 d t 600 300 (y) S 1\n";
process_line(prc_short, s);
/* PRM short (<12 tokens) should not crash. */
char prm_short[] = "PRM h 1000 d t 600 300 (y) S 4096 1 1 0\n";
process_line(prm_short, s);
/* Emit and sanity-check the output. */
char *buf = NULL;
size_t sz = 0;
FILE *out = open_memstream(&buf, &sz);
assert(out != NULL);
emit_results(s, out);
fclose(out);
CHECK(strstr(buf, "W\t1000\t1600\t2\t600\n") != NULL);
/* cc1: cpu delta 70 (pid 100 two samples) + 0 pids column via max(cpu,ram).
Peak RSS 4096, sum_avg 4096, rss_samples 1, pids max(1,1)=1. */
CHECK(strstr(buf, "N\tcc1\t70\t4096\t4096\t1\t1\n") != NULL);
/* short: single-sample pid 200 → delta == 10; no RAM, so peak/sum/rss=0. */
CHECK(strstr(buf, "N\tshort\t10\t0\t0\t0\t1\n") != NULL);
free(buf);
state_free(s);
}
static void test_empty_and_single_epoch(void)
{
State *s = state_new();
/* No input at all → window line with zeroes. */
char *buf = NULL;
size_t sz = 0;
FILE *out = open_memstream(&buf, &sz);
emit_results(s, out);
fclose(out);
CHECK(strstr(buf, "W\t0\t0\t0\t0\n") != NULL);
free(buf);
state_free(s);
/* Exactly one epoch → median interval stays 0. */
s = state_new();
char prc[] = "PRC h 500 d t 600 50 (a) S 1 1\n";
process_line(prc, s);
buf = NULL;
sz = 0;
out = open_memstream(&buf, &sz);
emit_results(s, out);
fclose(out);
CHECK(strstr(buf, "W\t500\t500\t1\t0\n") != NULL);
free(buf);
state_free(s);
}
static void test_delta_clamped_to_zero(void)
{
/* Counter reset: last < first → delta must clamp to 0. */
State *s = state_new();
char a[] = "PRC h 100 d t 600 77 (x) S 50 50\n";
char b[] = "PRC h 700 d t 600 77 (x) S 10 10\n";
process_line(a, s);
process_line(b, s);
char *buf = NULL;
size_t sz = 0;
FILE *out = open_memstream(&buf, &sz);
emit_results(s, out);
fclose(out);
CHECK(strstr(buf, "N\tx\t0\t") != NULL);
free(buf);
state_free(s);
}
static void test_hash_collision(void)
{
/* Force two PIDs into adjacent slots (Knuth hash rarely collides on
small integers, but we sweep a range to exercise the linear-probe
branch). */
State *s = state_new();
for (int pid = 1; pid <= 2000; pid++)
{
char line[128];
snprintf(line, sizeof(line), "PRC h 1000 d t 600 %d (p) S 1 1\n", pid);
process_line(line, s);
snprintf(line, sizeof(line), "PRM h 1000 d t 600 %d (p) S 4096 1 1 0 0\n", pid);
process_line(line, s);
}
state_free(s);
}
static void test_state_free_null(void)
{
/* Freeing NULL must be safe. */
state_free(NULL);
}
int main(void)
{
test_copy_name();
test_tokenize();
test_process_and_emit();
test_empty_and_single_epoch();
test_delta_clamped_to_zero();
test_hash_collision();
test_state_free_null();
if (failures > 0)
{
fprintf(stderr, "%d test failures\n", failures);
return 1;
}
printf("atop_agg tests: OK\n");
return 0;
}

File diff suppressed because one or more lines are too long

View File

@ -1,40 +0,0 @@
Checking 1dvelocitysimulator/main.c ...
1/20 files checked 2% done
Checking fps/main.c ...
2/20 files checked 10% done
Checking imageViewer/main.c ...
3/20 files checked 37% done
Checking lichess_random_engine/main.c ...
4/20 files checked 40% done
Checking lichess_random_engine/micro_max.c ...
5/20 files checked 49% done
Checking lichess_random_engine/movegen.c ...
6/20 files checked 60% done
Checking lichess_random_engine/perft.c ...
7/20 files checked 61% done
Checking lichess_random_engine/search.c ...
8/20 files checked 62% done
Checking misc/generatingWordsEndingWIthalka.c ...
9/20 files checked 63% done
Checking misc/randomJPG/generate_images.c ...
10/20 files checked 68% done
Checking misc/randomJPG/generate_jpg.c ...
11/20 files checked 73% done
Checking misc/split/main.c ...
12/20 files checked 74% done
Checking opening_learner/chess.c ...
13/20 files checked 83% done
Checking opening_learner/engine.c ...
14/20 files checked 86% done
Checking opening_learner/gui.c ...
15/20 files checked 90% done
Checking opening_learner/main.c ...
16/20 files checked 93% done
Checking opening_learner/mistakes.c ...
17/20 files checked 95% done
Checking scrapeWebsite/scrape.c ...
18/20 files checked 98% done
Checking tests/generatingPolishLettersOnWindowsTerminal.c ...
19/20 files checked 98% done
Checking websocketServer/main.c ...
20/20 files checked 100% done

View File

@ -1,627 +0,0 @@
Flawfinder version 2.0.19, (C) 2001-2019 David A. Wheeler.
Number of rules (primarily dangerous function names) in C/C++ ruleset: 222
./1dvelocitysimulator/main.c:16:5: [4] (shell) system:
This causes a new program to execute and is difficult to use safely
(CWE-78). try using a library call that implements the same functionality
if available.
./1dvelocitysimulator/main.c:22:5: [4] (shell) system:
This causes a new program to execute and is difficult to use safely
(CWE-78). try using a library call that implements the same functionality
if available.
./1dvelocitysimulator/main.c:27:5: [4] (shell) system:
This causes a new program to execute and is difficult to use safely
(CWE-78). try using a library call that implements the same functionality
if available.
./lichess_random_engine/movegen.c:35:20: [4] (buffer) strcpy:
Does not check for buffer overflows when copying to destination [MS-banned]
(CWE-120). Consider using snprintf, strcpy_s, or strlcpy (warning: strncpy
easily misused).
./opening_learner/engine.c:21:9: [4] (shell) execlp:
This causes a new program to execute and is difficult to use safely
(CWE-78). try using a library call that implements the same functionality
if available.
./scrapeWebsite/scrape.c:49:8: [4] (race) access:
This usually indicates a security flaw. If an attacker can change anything
along the path between the call to access() and the file's actual use
(e.g., by moving files), the attacker can exploit the race condition
(CWE-362/CWE-367!). Set up the correct permissions (e.g., using setuid())
and try to open the file directly.
./fps/main.c:521:2: [3] (random) srand:
This function is not sufficiently random for security-related functions
such as key and nonce creation (CWE-327). Use a more secure technique for
acquiring random values.
./lichess_random_engine/main.c:112:2: [3] (random) srand:
This function is not sufficiently random for security-related functions
such as key and nonce creation (CWE-327). Use a more secure technique for
acquiring random values.
./lichess_random_engine/micro_max.c:228:52: [3] (random) srand:
This function is not sufficiently random for security-related functions
such as key and nonce creation (CWE-327). Use a more secure technique for
acquiring random values.
./misc/randomJPG/generate_images.c:257:5: [3] (random) srand:
This function is not sufficiently random for security-related functions
such as key and nonce creation (CWE-327). Use a more secure technique for
acquiring random values.
./misc/randomJPG/generate_jpg.c:208:5: [3] (random) srand:
This function is not sufficiently random for security-related functions
such as key and nonce creation (CWE-327). Use a more secure technique for
acquiring random values.
./opening_learner/main.c:49:2: [3] (random) srand:
This function is not sufficiently random for security-related functions
such as key and nonce creation (CWE-327). Use a more secure technique for
acquiring random values.
./fps/main.c:338:3: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:26:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:34:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:87:5: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./imageViewer/main.c:416:17: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:447:17: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:475:17: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:553:17: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:585:17: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:614:17: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:689:12: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:1137:9: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./imageViewer/main.c:1181:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:1188:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:1200:9: [2] (buffer) strcpy:
Does not check for buffer overflows when copying to destination [MS-banned]
(CWE-120). Consider using snprintf, strcpy_s, or strlcpy (warning: strncpy
easily misused). Risk is low because the source is a constant string.
./imageViewer/main.c:1207:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./imageViewer/main.c:1208:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./lichess_random_engine/micro_max.c:15:6: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./lichess_random_engine/micro_max.c:179:6: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./lichess_random_engine/movegen.c:35:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./lichess_random_engine/movegen.c:36:5: [2] (buffer) strcat:
Does not check for buffer overflows when concatenating to destination
[MS-banned] (CWE-120). Consider using strcat_s, strncat, strlcat, or
snprintf (warning: strncat is easily misused). Risk is low because the
source is a constant string.
./lichess_random_engine/perft.c:38:21: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./lichess_random_engine/perft.c:46:17: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./lichess_random_engine/perft.c:53:36: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./misc/randomJPG/generate_images.c:106:21: [2] (misc) fopen:
Check when opening files - can an attacker redirect it (via symlinks),
force the opening of special file type (e.g., device files), move things
around to create a race condition, control its ancestors, or change its
contents? (CWE-362).
./misc/randomJPG/generate_images.c:117:21: [2] (misc) fopen:
Check when opening files - can an attacker redirect it (via symlinks),
force the opening of special file type (e.g., device files), move things
around to create a race condition, control its ancestors, or change its
contents? (CWE-362).
./misc/randomJPG/generate_images.c:121:14: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./misc/randomJPG/generate_images.c:124:14: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./misc/randomJPG/generate_images.c:163:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./misc/randomJPG/generate_images.c:234:33: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_images.c:235:27: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_images.c:236:33: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_images.c:237:30: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_images.c:273:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./misc/randomJPG/generate_jpg.c:106:21: [2] (misc) fopen:
Check when opening files - can an attacker redirect it (via symlinks),
force the opening of special file type (e.g., device files), move things
around to create a race condition, control its ancestors, or change its
contents? (CWE-362).
./misc/randomJPG/generate_jpg.c:124:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./misc/randomJPG/generate_jpg.c:186:33: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_jpg.c:187:27: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_jpg.c:188:33: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_jpg.c:189:30: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./misc/randomJPG/generate_jpg.c:224:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/chess.c:253:33: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/chess.c:270:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/chess.h:11:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/chess.h:48:33: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:36:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:82:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:88:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:90:41: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:92:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:104:31: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./opening_learner/engine.c:105:66: [2] (integer) atoi:
Unless checked, the resulting number can exceed the expected range
(CWE-190). If source untrusted, check both minimum and maximum, even if the
input had no minus sign (large numbers can roll over into negative number;
consider saving to an unsigned value if that is intended).
./opening_learner/engine.c:106:25: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:124:59: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:126:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.c:128:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.h:11:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/engine.h:32:59: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/gui.c:73:29: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/gui.h:24:29: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:29:2: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:36:38: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:77:2: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:79:2: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:83:2: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:95:4: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:99:4: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:103:6: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./opening_learner/main.c:136:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:155:4: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/main.c:164:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/mistakes.c:32:15: [2] (misc) fopen:
Check when opening files - can an attacker redirect it (via symlinks),
force the opening of special file type (e.g., device files), move things
around to create a race condition, control its ancestors, or change its
contents? (CWE-362).
./opening_learner/mistakes.c:42:15: [2] (misc) fopen:
Check when opening files - can an attacker redirect it (via symlinks),
force the opening of special file type (e.g., device files), move things
around to create a race condition, control its ancestors, or change its
contents? (CWE-362).
./opening_learner/mistakes.c:44:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/mistakes.c:44:21: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/mistakes.c:44:41: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/mistakes.c:44:61: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/mistakes.c:49:13: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./opening_learner/mistakes.c:53:13: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./opening_learner/mistakes.c:57:13: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./opening_learner/mistakes.h:10:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/mistakes.h:11:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./opening_learner/mistakes.h:13:5: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./scrapeWebsite/scrape.c:28:5: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./scrapeWebsite/scrape.c:56:20: [2] (misc) fopen:
Check when opening files - can an attacker redirect it (via symlinks),
force the opening of special file type (e.g., device files), move things
around to create a race condition, control its ancestors, or change its
contents? (CWE-362).
./websocketServer/main.c:22:22: [2] (buffer) char:
Statically-sized arrays can be improperly restricted, leading to potential
overflows or other issues (CWE-119!/CWE-120). Perform bounds checking, use
functions that limit length, or ensure that the size is larger than the
maximum possible length.
./websocketServer/main.c:24:13: [2] (buffer) memcpy:
Does not check for buffer overflows when copying to destination (CWE-120).
Make sure destination can always hold the source data.
./fps/main.c:345:22: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./fps/main.c:346:22: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./fps/main.c:347:22: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:233:27: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:404:27: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:453:49: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:455:43: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:476:31: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:477:31: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:493:33: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:494:33: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:592:49: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:594:43: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:615:31: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:616:31: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:632:33: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:633:33: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:1182:18: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./imageViewer/main.c:1191:23: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./lichess_random_engine/micro_max.c:163:18: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./lichess_random_engine/micro_max.c:241:11: [1] (buffer) strncpy:
Easily used incorrectly; doesn't always \0-terminate or check for invalid
pointers [MS-banned] (CWE-120).
./lichess_random_engine/movegen.c:428:18: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./lichess_random_engine/movegen.c:439:25: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./opening_learner/chess.c:261:15: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./opening_learner/engine.c:38:9: [1] (obsolete) usleep:
This C routine is considered obsolete (as opposed to the shell command by
the same name). The interaction of this function with SIGALRM and other
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
./opening_learner/engine.c:39:21: [1] (buffer) read:
Check buffer boundaries if used in a loop including recursive loops
(CWE-120, CWE-20).
./opening_learner/engine.c:49:9: [1] (obsolete) usleep:
This C routine is considered obsolete (as opposed to the shell command by
the same name). The interaction of this function with SIGALRM and other
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
./opening_learner/engine.c:50:21: [1] (buffer) read:
Check buffer boundaries if used in a loop including recursive loops
(CWE-120, CWE-20).
./opening_learner/engine.c:72:18: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
./opening_learner/engine.c:94:9: [1] (obsolete) usleep:
This C routine is considered obsolete (as opposed to the shell command by
the same name). The interaction of this function with SIGALRM and other
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
./opening_learner/engine.c:95:17: [1] (buffer) read:
Check buffer boundaries if used in a loop including recursive loops
(CWE-120, CWE-20).
./opening_learner/engine.c:107:25: [1] (buffer) sscanf:
It's unclear if the %s limit in the format string is small enough
(CWE-120). Check that the limit is sufficiently small, or use a different
input function.
./opening_learner/engine.c:130:9: [1] (obsolete) usleep:
This C routine is considered obsolete (as opposed to the shell command by
the same name). The interaction of this function with SIGALRM and other
timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is
unspecified (CWE-676). Use nanosleep(2) or setitimer(2) instead.
./opening_learner/engine.c:131:17: [1] (buffer) read:
Check buffer boundaries if used in a loop including recursive loops
(CWE-120, CWE-20).
./opening_learner/engine.c:136:52: [1] (buffer) sscanf:
It's unclear if the %s limit in the format string is small enough
(CWE-120). Check that the limit is sufficiently small, or use a different
input function.
./opening_learner/main.c:23:15: [1] (buffer) strncat:
Easily used incorrectly (e.g., incorrectly computing the correct maximum
size to add) [MS-banned] (CWE-120). Consider strcat_s, strlcat, snprintf,
or automatically resizing strings. Risk is low because the source is a
constant character.
./opening_learner/main.c:24:2: [1] (buffer) strncat:
Easily used incorrectly (e.g., incorrectly computing the correct maximum
size to add) [MS-banned] (CWE-120). Consider strcat_s, strlcat, snprintf,
or automatically resizing strings.
./opening_learner/main.c:36:73: [1] (buffer) strncpy:
Easily used incorrectly; doesn't always \0-terminate or check for invalid
pointers [MS-banned] (CWE-120).
./opening_learner/main.c:100:30: [1] (buffer) strncpy:
Easily used incorrectly; doesn't always \0-terminate or check for invalid
pointers [MS-banned] (CWE-120).
./opening_learner/main.c:140:5: [1] (buffer) strncpy:
Easily used incorrectly; doesn't always \0-terminate or check for invalid
pointers [MS-banned] (CWE-120).
./websocketServer/main.c:23:30: [1] (buffer) strlen:
Does not handle strings that are not \0-terminated; if given one it may
perform an over-read (it could cause a crash if unprotected) (CWE-126).
ANALYSIS SUMMARY:
Hits = 140
Lines analyzed = 5027 in approximately 0.26 seconds (19578 lines/second)
Physical Source Lines of Code (SLOC) = 4111
Hits@level = [0] 208 [1] 41 [2] 87 [3] 6 [4] 6 [5] 0
Hits@level+ = [0+] 348 [1+] 140 [2+] 99 [3+] 12 [4+] 6 [5+] 0
Hits/KSLOC@level+ = [0+] 84.6509 [1+] 34.055 [2+] 24.0817 [3+] 2.919 [4+] 1.4595 [5+] 0
Dot directories skipped = 1 (--followdotdir overrides)
Minimum risk level = 1
Not every hit is necessarily a security vulnerability.
You can inhibit a report by adding a comment in this form:
// flawfinder: ignore
Make *sure* it's a false positive!
You can use the option --neverignore to show these.
There may be other security vulnerabilities; review your code!
See 'Secure Programming HOWTO'
(https://dwheeler.com/secure-programs) for more information.

View File

@ -1,347 +0,0 @@
#!/usr/bin/env bash
# Lint all C code in C/ and its subfolders with aggressive rules
# - Installs required tools if missing (clang-tidy, clang-format, cppcheck, flawfinder)
# - Uses compile_commands.json if present for clang-tidy; otherwise uses sane defaults
# - Checks formatting with clang-format --dry-run --Werror
# - Runs cppcheck with exhaustive rules
# - Runs flawfinder for security issues
set -u
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
info() { echo -e "${BLUE}==>${NC} $*"; }
ok() { echo -e "${GREEN}${NC} $*"; }
warn() { echo -e "${YELLOW}${NC} $*"; }
err() { echo -e "${RED}${NC} $*"; }
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
C_DIR="${ROOT_DIR}/C"
AUTOFIX=${LINT_AUTOFIX:-1}
if [[ ! -d "${C_DIR}" ]]; then
err "C directory not found at ${C_DIR}"
exit 1
fi
ISSUES=0
MISSING=()
C_FILES=()
C_SOURCES=()
need_cmd() {
command -v "$1" >/dev/null 2>&1 || MISSING+=("$1")
}
detect_pkg_manager() {
if command -v pacman >/dev/null 2>&1; then echo pacman; return; fi
if command -v apt-get >/dev/null 2>&1; then echo apt; return; fi
if command -v apt >/dev/null 2>&1; then echo apt; return; fi
if command -v dnf >/dev/null 2>&1; then echo dnf; return; fi
if command -v zypper >/dev/null 2>&1; then echo zypper; return; fi
if command -v apk >/dev/null 2>&1; then echo apk; return; fi
echo none
}
install_tools() {
info "Checking required tools..."
need_cmd clang-tidy
need_cmd clang-format
need_cmd cppcheck
need_cmd flawfinder
if [[ ${#MISSING[@]} -eq 0 ]]; then
ok "All tools present: clang-tidy, clang-format, cppcheck, flawfinder"
return 0
fi
warn "Missing tools: ${MISSING[*]} — attempting to install with sudo"
local pm
pm=$(detect_pkg_manager)
case "$pm" in
pacman)
sudo pacman -S --needed --noconfirm clang clang-tools-extra clang-format cppcheck flawfinder || true
;;
apt|apt-get)
sudo "$pm" update -y || true
# clang-tidy and clang-format may be versioned; prefer unversioned meta pkgs
sudo "$pm" install -y clang-tidy clang-format cppcheck flawfinder || true
;;
dnf)
sudo dnf install -y clang-tools-extra clang cppcheck flawfinder || true
;;
zypper)
sudo zypper --non-interactive install clang-tools clang-tools-extra cppcheck flawfinder || true
;;
apk)
sudo apk add clang-extra-tools clang cppcheck flawfinder || true
;;
*)
warn "Unsupported package manager. Please install: clang-tidy clang-format cppcheck flawfinder"
;;
esac
# Re-check after attempted install
MISSING=()
need_cmd clang-tidy
need_cmd clang-format
need_cmd cppcheck
need_cmd flawfinder
if [[ ${#MISSING[@]} -ne 0 ]]; then
warn "Still missing: ${MISSING[*]} — continuing, but related steps may be skipped"
else
ok "Tools installed"
fi
}
ensure_configs() {
# Provide default aggressive configs if missing
if [[ ! -f "${C_DIR}/.clang-tidy" ]]; then
warn ".clang-tidy not found in C/. Creating a default aggressive config."
cat >"${C_DIR}/.clang-tidy" <<'YAML'
Checks: >
clang-analyzer-*,bugprone-*,cert-*,concurrency-*,hicpp-*,misc-*,performance-*,
portability-*,readability-*,clang-diagnostic-*,cppcoreguidelines-*
WarningsAsErrors: '*'
HeaderFilterRegex: '.*'
AnalyzeTemporaryDtors: true
FormatStyle: none
YAML
fi
if [[ ! -f "${C_DIR}/.clang-format" ]]; then
warn ".clang-format not found in C/. Creating a default style."
cat >"${C_DIR}/.clang-format" <<'YAML'
BasedOnStyle: LLVM
IndentWidth: 4
TabWidth: 4
UseTab: Never
ColumnLimit: 100
SortIncludes: true
AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: true
AllowShortIfStatementsOnASingleLine: false
BreakBeforeBraces: Allman
Standard: C23
YAML
fi
}
collect_files() {
# shellcheck disable=SC2207
C_FILES=($(find "${C_DIR}" -type f \( -name '*.c' -o -name '*.h' -o -name '*.inc' \) \
-not -path '*/.*' -not -path '*/build/*' -not -path '*/dist/*' -not -path '*/out/*' \
-not -path '*/bin/*' -not -path '*/obj/*'))
if [[ ${#C_FILES[@]} -eq 0 ]]; then
warn "No C files found under ${C_DIR}"
else
ok "Found ${#C_FILES[@]} C-related files to check"
fi
mapfile -t C_SOURCES < <(find "${C_DIR}" -type f -name '*.c' \
-not -path '*/.*' -not -path '*/build/*' -not -path '*/dist/*' -not -path '*/out/*' \
-not -path '*/bin/*' -not -path '*/obj/*')
}
apply_clang_format_fix() {
if ! command -v clang-format >/dev/null 2>&1; then
warn "clang-format unavailable; skipping auto-format"
return
fi
if [[ ${#C_FILES[@]} -eq 0 ]]; then
return
fi
info "Applying clang-format -i to source files"
local formatted=0
for f in "${C_FILES[@]}"; do
if clang-format -i "$f" 2>/dev/null; then
formatted=$((formatted+1))
fi
done
ok "clang-format applied to ${formatted} file(s)"
}
apply_clang_tidy_fix() {
if ! command -v clang-tidy >/dev/null 2>&1; then
warn "clang-tidy unavailable; skipping auto-fix"
return
fi
if [[ ${#C_SOURCES[@]} -eq 0 ]]; then
return
fi
local db="${C_DIR}/compile_commands.json"
local used_db="no"
if [[ -f "$db" ]] && head -n 1 "$db" | grep -q '\['; then
used_db="yes"
fi
info "Applying clang-tidy --fix to C sources"
local failures=0
for f in "${C_SOURCES[@]}"; do
local rel
rel=$(realpath --relative-to="${ROOT_DIR}" "$f" 2>/dev/null || echo "$f")
printf ' • %s\n' "$rel"
if [[ "$used_db" == "yes" ]]; then
if ! clang-tidy "$f" -p "${C_DIR}" --fix --format-style=file --quiet >/dev/null 2>&1; then
failures=$((failures+1))
fi
else
if ! clang-tidy "$f" --fix --format-style=file --quiet -- -std=c2x -I"$(dirname "$f")" -I"${C_DIR}" >/dev/null 2>&1; then
failures=$((failures+1))
fi
fi
done
if [[ $failures -gt 0 ]]; then
warn "clang-tidy auto-fix encountered $failures issue(s); manual review may be required"
else
ok "clang-tidy auto-fix pass completed"
fi
}
apply_autofix() {
if [[ "$AUTOFIX" == "0" ]]; then
info "Automatic fixes disabled (LINT_AUTOFIX=0)"
return
fi
info "Automatic fixes enabled (LINT_AUTOFIX=${AUTOFIX})"
apply_clang_format_fix
apply_clang_tidy_fix
# Refresh file lists in case new files were introduced by fixes
collect_files
}
run_clang_format() {
if ! command -v clang-format >/dev/null 2>&1; then
warn "clang-format unavailable; skipping format check"
return
fi
info "Checking formatting with clang-format (--dry-run --Werror)"
local bad=0
for f in "${C_FILES[@]}"; do
if ! clang-format --dry-run --Werror "$f" >/dev/null 2>&1; then
echo "format issue: $f"
bad=$((bad+1))
fi
done
if [[ $bad -gt 0 ]]; then
warn "clang-format found $bad files needing formatting"
ISSUES=$((ISSUES+bad))
else
ok "Formatting OK"
fi
}
run_cppcheck() {
if ! command -v cppcheck >/dev/null 2>&1; then
warn "cppcheck unavailable; skipping"
return
fi
info "Running cppcheck (aggressive, recursive)"
# Use a temp report file to avoid noisy exit codes stopping script
local report
report=$(mktemp)
local opts=(--enable=all --inconclusive --std=c23 --check-level=exhaustive --force \
--quiet --error-exitcode=2 --inline-suppr --suppress=missingIncludeSystem \
--library=posix)
# Exclude common non-source dirs
opts+=(--exclude=build --exclude=dist --exclude=out --exclude=.git --exclude=bin --exclude=obj)
if ! cppcheck "${opts[@]}" "${C_DIR}" 2>"$report"; then
warn "cppcheck reported issues (see summary below)"
ISSUES=$((ISSUES+1))
else
ok "cppcheck passed"
fi
if [[ -s "$report" ]]; then
echo
echo "cppcheck output:" && sed -e 's/^/ /' "$report"
fi
rm -f "$report"
}
run_clang_tidy() {
if ! command -v clang-tidy >/dev/null 2>&1; then
warn "clang-tidy unavailable; skipping"
return
fi
info "Running clang-tidy on .c files"
local db="${C_DIR}/compile_commands.json"
local used_db="no"
if [[ ${#C_SOURCES[@]} -eq 0 ]]; then
warn "No .c files for clang-tidy"
return
fi
if [[ -f "$db" ]]; then
# Basic validation: ensure JSON array starts with [ and includes "directory"
if head -n 1 "$db" | grep -q '\['; then
used_db="yes"
else
warn "compile_commands.json seems malformed; ignoring"
fi
fi
local failures=0
for f in "${C_SOURCES[@]}"; do
if [[ "$used_db" == "yes" ]]; then
clang-tidy "$f" -p "${C_DIR}" --quiet || failures=$((failures+1))
else
# Fallback args: try C23 and include local dir
clang-tidy "$f" --quiet -- -std=c2x -I"$(dirname "$f")" -I"${C_DIR}" || failures=$((failures+1))
fi
done
if [[ $failures -gt 0 ]]; then
warn "clang-tidy found issues in $failures file(s)"
ISSUES=$((ISSUES+failures))
else
ok "clang-tidy passed"
fi
}
run_flawfinder() {
if ! command -v flawfinder >/dev/null 2>&1; then
warn "flawfinder unavailable; skipping"
return
fi
info "Running flawfinder (security-focused scan)"
local report
report=$(mktemp)
if ! flawfinder --quiet --columns --minlevel=1 --falsepositive "${C_DIR}" >"$report" 2>/dev/null; then
warn "flawfinder reported issues"
ISSUES=$((ISSUES+1))
else
ok "flawfinder completed"
fi
if [[ -s "$report" ]]; then
echo
echo "flawfinder notable findings:" && head -n 200 "$report" | sed -e 's/^/ /'
fi
rm -f "$report"
}
summary_exit() {
echo
if [[ $ISSUES -gt 0 ]]; then
err "Lint completed with $ISSUES issue(s) detected"
echo "Tip: run 'clang-format -i' to fix formatting; many clang-tidy checks support '--fix'"
exit 1
else
ok "All checks passed with no issues"
fi
}
main() {
echo -e "${BLUE}C folder aggressive lint suite${NC}"
echo
install_tools
ensure_configs
collect_files
apply_autofix
run_clang_format
run_cppcheck
run_clang_tidy
run_flawfinder
summary_exit
}
main "$@"

View File

@ -1,6 +1,6 @@
# testsAndMisc
A collection of personal projects, scripts, and experiments — from a GPS-based phone focus tool to C/C++ demos, with CI, linting, and pre-commit hooks across the board.
A collection of personal projects, scripts, and experiments — from a GPS-based phone focus tool to Linux/Arch automation, with CI, linting, and pre-commit hooks across the board.
## Highlights
@ -14,21 +14,26 @@ Automated Arch Linux setup: fresh-install scripts, i3 window manager config, LaT
### [Scripts](scripts/)
Utility scripts for development workflows — C/C++ build file validation, secret detection, and custom makepkg helpers.
Utility scripts for development workflows — build file validation, secret detection, and custom makepkg helpers.
## Other Projects
## Repository Layout
| Directory | Description |
| ------------- | ------------------------ |
| `Bash/` | FFmpeg build scripts |
| `C/` | Small native helpers |
| `python_pkg/` | Python package structure |
| Path | Description |
| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `python_pkg/` | Python packages (each maintained subpackage lives here) |
| `linux_configuration/` | Arch Linux setup, i3 config, system maintenance scripts |
| `phone_focus_mode/` | GPS-based Android focus enforcer |
| `scripts/` | Workspace-level helper scripts and pre-commit hooks |
| `docs/` | Reference docs and historical reports |
| `third_party/` | Vendored upstream skills/agents |
| `meta/` | Repo-wide tooling: `pyproject.toml`, `requirements.txt`, `.pre-commit-config.yaml`, `run.sh`, `lint_python.sh`, `.fvmrc`. Symlinked into the repo root so tools that auto-discover from root keep working. |
Archived / unmaintained projects live in the sibling repository
[`testsAndMisc-archive`](https://github.com/kuhyx/testsAndMisc-archive).
## Tooling
- **Python linting**: [Ruff](https://docs.astral.sh/ruff/) with all rules enabled (see `pyproject.toml`)
- **Python linting**: [Ruff](https://docs.astral.sh/ruff/) with all rules enabled (see `meta/pyproject.toml`)
- **Dependencies**: `pip install -r meta/requirements.txt` (combined runtime + dev)
- **CI**: GitHub Actions — lint, build, and test on push
- **Testing**: pytest (Python), custom shell-based test harness for scripts

View File

@ -0,0 +1,50 @@
{
"intent": "Consolidate repo-wide tooling configs into a meta/ folder, drop unused C/ and a few stale python_pkg subpackages, combine requirements files, and remove setup.sh + .binary-allowlist. Root-level symlinks keep tool auto-discovery (pyproject.toml, .pre-commit-config.yaml, requirements.txt, run.sh, lint_python.sh, .fvmrc) working unchanged.",
"scope": [
"Delete: setup.sh, .binary-allowlist, C/, python_pkg/{split,pdfCentered,geo_data}, scripts/check_c_cpp_build_files.sh",
"Move into meta/: run.sh, lint_python.sh, pyproject.toml, .pre-commit-config.yaml, .fvmrc",
"Combine requirements.txt + requirements-dev.txt into meta/requirements.txt",
"Add root symlinks for each moved file so existing tooling keeps resolving from repo root",
"Update README.md and meta/.pre-commit-config.yaml to drop archived path references",
"Add .secret-patterns to .gitignore and untrack it (its content is sensitive home-coordinate regex)"
],
"changes": [
"git rm of removed paths and stale combined requirements files",
"git mv of five root configs into meta/ preserving git history",
"Created meta/requirements.txt as the single source of truth (alphabetically sorted, pre-commit-friendly)",
"Created six root symlinks pointing into meta/",
"Removed clang-format/cppcheck/flawfinder/check-c-cpp-build-files hooks (no C/C++ code remains)",
"Edited check-json, check_polling, codespell, gitleaks-style excludes to drop C/ and CPP/ references",
"Rewrote README.md repository layout section to mention meta/ and drop C/ + Bash/ rows"
],
"verification": [
{
"command": "ls -la run.sh pyproject.toml .pre-commit-config.yaml requirements.txt lint_python.sh .fvmrc",
"result": "pass",
"evidence": "All resolve as symlinks into meta/ with correct targets"
},
{
"command": "python -c 'import tomllib; tomllib.load(open(\"pyproject.toml\",\"rb\"))'",
"result": "pass",
"evidence": "tomllib parses pyproject.toml through the root symlink"
},
{
"command": "pre-commit validate-config",
"result": "pass",
"evidence": "Config validates after path edits"
},
{
"command": "pre-commit run --hook-stage pre-commit",
"result": "pending",
"evidence": "Final run executed after adding this artifact"
}
],
"risks": [
"Anything that hard-coded the legacy path requirements-dev.txt or C/ breaks. No internal callers found via git grep.",
"If a tool resolves symlinks (rare) it might log the meta/ path instead of root; harmless."
],
"rollback": [
"git revert the consolidation commit; symlinks and moved files restore atomically because git tracks them.",
"Run pre-commit run --all-files after revert to confirm legacy layout still validates."
]
}

View File

@ -1,346 +0,0 @@
#!/usr/bin/env bash
# ==============================================================================
# Python Linting Script - Run ALL linters with aggressive settings
# ==============================================================================
# Usage:
# ./lint_python.sh # Lint all Python files
# ./lint_python.sh --fix # Lint and auto-fix where possible
# ./lint_python.sh <file.py> # Lint specific file
# ./lint_python.sh --quick # Quick lint (ruff + mypy only)
# ./lint_python.sh --report # Generate detailed reports
# ==============================================================================
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
BOLD='\033[1m'
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="${SCRIPT_DIR}"
PYTHON_PATHS=(
"PYTHON"
"articles"
"poker-modifier-app"
"tests"
)
EXCLUDE_PATHS=(
".venv"
"__pycache__"
".git"
"Bash/ffmpeg-build"
".pytest_cache"
".ruff_cache"
".mypy_cache"
)
# Build exclude pattern for find
EXCLUDE_PATTERN=""
for path in "${EXCLUDE_PATHS[@]}"; do
EXCLUDE_PATTERN="${EXCLUDE_PATTERN} -path '*/${path}/*' -prune -o"
done
# Parse arguments
FIX_MODE=false
QUICK_MODE=false
REPORT_MODE=false
TARGET_FILES=""
while [[ $# -gt 0 ]]; do
case $1 in
--fix|-f)
FIX_MODE=true
shift
;;
--quick|-q)
QUICK_MODE=true
shift
;;
--report|-r)
REPORT_MODE=true
shift
;;
--help|-h)
echo "Usage: $0 [OPTIONS] [FILES...]"
echo ""
echo "Options:"
echo " --fix, -f Auto-fix issues where possible"
echo " --quick, -q Quick mode (ruff + mypy only)"
echo " --report, -r Generate detailed reports to ./lint-reports/"
echo " --help, -h Show this help message"
echo ""
echo "Examples:"
echo " $0 # Lint all Python files"
echo " $0 --fix # Lint and auto-fix"
echo " $0 PYTHON/ # Lint specific directory"
echo " $0 --quick --fix # Quick lint with auto-fix"
exit 0
;;
*)
TARGET_FILES="${TARGET_FILES} $1"
shift
;;
esac
done
# If no target specified, use default paths
if [[ -z "${TARGET_FILES}" ]]; then
TARGET_FILES="${PYTHON_PATHS[*]}"
fi
# Create reports directory if needed
if [[ "${REPORT_MODE}" == true ]]; then
mkdir -p "${PROJECT_ROOT}/lint-reports"
fi
# Track overall status
OVERALL_STATUS=0
FAILED_TOOLS=()
# ==============================================================================
# Helper functions
# ==============================================================================
print_header() {
echo ""
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD}${BLUE} $1${NC}"
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
}
print_subheader() {
echo ""
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
echo -e "${CYAN} $1${NC}"
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
}
print_success() {
echo -e "${GREEN}${NC} $1"
}
print_warning() {
echo -e "${YELLOW}${NC} $1"
}
print_error() {
echo -e "${RED}${NC} $1"
}
print_info() {
echo -e "${BLUE}${NC} $1"
}
run_tool() {
local tool_name="$1"
local tool_cmd="$2"
local report_file="${PROJECT_ROOT}/lint-reports/${tool_name}.txt"
print_subheader "Running ${tool_name}..."
if [[ "${REPORT_MODE}" == true ]]; then
if eval "${tool_cmd}" 2>&1 | tee "${report_file}"; then
print_success "${tool_name} passed"
return 0
else
print_error "${tool_name} found issues (see ${report_file})"
FAILED_TOOLS+=("${tool_name}")
return 1
fi
else
if eval "${tool_cmd}"; then
print_success "${tool_name} passed"
return 0
else
print_error "${tool_name} found issues"
FAILED_TOOLS+=("${tool_name}")
return 1
fi
fi
}
check_tool() {
if command -v "$1" &> /dev/null; then
return 0
else
print_warning "$1 not found, skipping..."
return 1
fi
}
# ==============================================================================
# Main linting process
# ==============================================================================
print_header "Python Linting Suite - Aggressive Mode"
echo ""
print_info "Target: ${TARGET_FILES}"
print_info "Fix mode: ${FIX_MODE}"
print_info "Quick mode: ${QUICK_MODE}"
print_info "Report mode: ${REPORT_MODE}"
cd "${PROJECT_ROOT}"
# ==============================================================================
# RUFF - Primary linter and formatter
# ==============================================================================
if check_tool ruff; then
if [[ "${FIX_MODE}" == true ]]; then
run_tool "ruff-lint" "ruff check --fix --show-fixes ${TARGET_FILES}" || OVERALL_STATUS=1
run_tool "ruff-format" "ruff format ${TARGET_FILES}" || OVERALL_STATUS=1
else
run_tool "ruff-lint" "ruff check ${TARGET_FILES}" || OVERALL_STATUS=1
run_tool "ruff-format-check" "ruff format --check ${TARGET_FILES}" || OVERALL_STATUS=1
fi
fi
# ==============================================================================
# MYPY - Static type checking
# ==============================================================================
if check_tool mypy; then
run_tool "mypy" "mypy --strict --ignore-missing-imports ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# Quick mode exits here
if [[ "${QUICK_MODE}" == true ]]; then
print_header "Quick Lint Complete"
if [[ ${#FAILED_TOOLS[@]} -gt 0 ]]; then
print_error "Failed tools: ${FAILED_TOOLS[*]}"
exit 1
else
print_success "All quick checks passed!"
exit 0
fi
fi
# ==============================================================================
# PYLINT - Comprehensive linting
# ==============================================================================
if check_tool pylint; then
run_tool "pylint" "pylint --rcfile=pyproject.toml --jobs=0 --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# BANDIT - Security linting
# ==============================================================================
if check_tool bandit; then
run_tool "bandit" "bandit -c pyproject.toml -r ${TARGET_FILES} --severity-level low --confidence-level low" || OVERALL_STATUS=1
fi
# ==============================================================================
# VULTURE - Dead code detection
# ==============================================================================
if check_tool vulture; then
run_tool "vulture" "vulture --min-confidence 80 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# FLAKE8 - Traditional linter
# ==============================================================================
if check_tool flake8; then
run_tool "flake8" "flake8 --max-line-length=88 --extend-ignore=E203,W503 --max-complexity=10 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# PYCODESTYLE - PEP 8 style checker
# ==============================================================================
if check_tool pycodestyle; then
run_tool "pycodestyle" "pycodestyle --max-line-length=88 --ignore=E203,W503 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# PYDOCSTYLE - Docstring style checker
# ==============================================================================
if check_tool pydocstyle; then
run_tool "pydocstyle" "pydocstyle --convention=google ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# RADON - Complexity metrics
# ==============================================================================
if check_tool radon; then
print_subheader "Running radon (complexity analysis)..."
echo ""
echo -e "${MAGENTA}Cyclomatic Complexity:${NC}"
radon cc -a -s ${TARGET_FILES} || true
echo ""
echo -e "${MAGENTA}Maintainability Index:${NC}"
radon mi -s ${TARGET_FILES} || true
if [[ "${REPORT_MODE}" == true ]]; then
radon cc -a -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-cc.txt" 2>&1 || true
radon mi -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-mi.txt" 2>&1 || true
fi
fi
# ==============================================================================
# INTERROGATE - Docstring coverage
# ==============================================================================
if check_tool interrogate; then
run_tool "interrogate" "interrogate -v --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# PYRIGHT - Microsoft's type checker (optional, very strict)
# ==============================================================================
if check_tool pyright; then
run_tool "pyright" "pyright ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# AUTOFLAKE - Unused imports/variables (fix mode only)
# ==============================================================================
if [[ "${FIX_MODE}" == true ]] && check_tool autoflake; then
print_subheader "Running autoflake (removing unused imports)..."
find ${TARGET_FILES} -name "*.py" -type f -exec autoflake --in-place --remove-all-unused-imports --remove-unused-variables {} \;
print_success "autoflake completed"
fi
# ==============================================================================
# PYUPGRADE - Upgrade Python syntax (fix mode only)
# ==============================================================================
if [[ "${FIX_MODE}" == true ]] && check_tool pyupgrade; then
print_subheader "Running pyupgrade (upgrading syntax to Python 3.10+)..."
find ${TARGET_FILES} -name "*.py" -type f -exec pyupgrade --py310-plus {} \;
print_success "pyupgrade completed"
fi
# ==============================================================================
# CODESPELL - Spell checking
# ==============================================================================
if check_tool codespell; then
if [[ "${FIX_MODE}" == true ]]; then
run_tool "codespell" "codespell -w --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
else
run_tool "codespell" "codespell --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
fi
fi
# ==============================================================================
# Summary
# ==============================================================================
print_header "Linting Summary"
echo ""
if [[ ${OVERALL_STATUS} -ne 0 ]]; then
print_error "The following tools reported issues:"
for tool in "${FAILED_TOOLS[@]}"; do
echo " - ${tool}"
done
echo ""
if [[ "${REPORT_MODE}" == true ]]; then
print_info "Detailed reports saved to: ${PROJECT_ROOT}/lint-reports/"
fi
print_info "Run with --fix to auto-fix issues where possible"
exit ${OVERALL_STATUS}
else
print_success "All linting checks passed!"
exit 0
fi

1
lint_python.sh Symbolic link
View File

@ -0,0 +1 @@
meta/lint_python.sh

3
meta/.fvmrc Normal file
View File

@ -0,0 +1,3 @@
{
"flutter": "stable"
}

View File

@ -0,0 +1,391 @@
# ==============================================================================
# Pre-commit Configuration - Multi-language Linting & Formatting
# ==============================================================================
# Install: pre-commit install && pre-commit install --hook-type pre-push
# Fast lint: pre-commit run --all-files (linters only, ~10 s)
# Full suite: pre-commit run --all-files --hook-stage pre-push (+ tests)
# Update hooks: pre-commit autoupdate
# ==============================================================================
# Global settings
default_language_version:
python: python3
# Fail fast on first error (set to false to see all errors)
fail_fast: false
# Configuration
ci:
autofix_commit_msg: "style: auto-fix by pre-commit hooks"
autoupdate_commit_msg: "chore: update pre-commit hooks"
repos:
# ===========================================================================
# GENERAL HOOKS - File formatting and validation
# ===========================================================================
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer
- id: check-yaml
args: [--unsafe]
- id: check-json
# Exclude JSONC files (VS Code configs, TypeScript configs)
exclude: ^(\.vscode/|.*/\.vscode/|.*tsconfig.*\.json)
- id: check-toml
- id: check-xml
- id: check-added-large-files
args: [--maxkb=2000]
- id: check-merge-conflict
- id: check-case-conflict
- id: check-symlinks
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
- id: detect-private-key
- id: debug-statements
- id: name-tests-test
args: [--pytest-test-first]
- id: check-ast
- id: check-builtin-literals
- id: check-docstring-first
- id: fix-byte-order-marker
- id: mixed-line-ending
args: [--fix=lf]
- id: requirements-txt-fixer
# ===========================================================================
# BINARY BLOCKER - Prevent binary/image files from being committed
# ===========================================================================
- repo: local
hooks:
- id: no-binaries
name: Block binary/image files
entry: scripts/check_no_binaries.sh
language: script
always_run: false
- id: ai-evidence-contract
name: Require AI evidence artifacts for code changes
entry: scripts/check_ai_evidence.sh
language: script
pass_filenames: false
always_run: true
- id: ai-multifile-contract
name: Require workflow contract for multi-file code changes
entry: scripts/check_agent_contract.sh
language: script
pass_filenames: false
always_run: true
- id: append-only-sessions
name: Enforce append-only session logs
entry: scripts/check_append_only_sessions.sh
language: script
pass_filenames: false
always_run: true
# ===========================================================================
# POLLING SCRIPT LINTER - Detect fork-storm anti-patterns in shell scripts
# ===========================================================================
- repo: local
hooks:
- id: no-polling-antipatterns
name: Block polling script anti-patterns
entry: scripts/check_polling_antipatterns.sh
language: script
types: [shell]
exclude: ^(\.git/|phone_focus_mode/lib/tests/|tests/)
# ===========================================================================
# NOQA BLOCKER - Zero tolerance for noqa/type:ignore suppression comments
# ===========================================================================
- repo: local
hooks:
- id: no-noqa
name: Block noqa comments
entry: '(?i)#\s*(noqa|type:\s*ignore)'
language: pygrep
types: [python]
- id: no-ruff-noqa
name: Block ruff noqa file-level comments
entry: '(?i)#\s*ruff:\s*noqa'
language: pygrep
types: [python]
# ===========================================================================
# RUFF - Fast Python linter and formatter (replaces black, isort, flake8, etc.)
# ===========================================================================
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.2
hooks:
# Linter - run first to catch issues
- id: ruff
args:
- --fix
- --unsafe-fixes
- --exit-non-zero-on-fix
- --show-fixes
types_or: [python, pyi]
# Formatter - run after linting
- id: ruff-format
types_or: [python, pyi]
# ===========================================================================
# MYPY - Static type checking (runs on push only for speed)
# ===========================================================================
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.13.0
hooks:
- id: mypy
stages: [pre-push]
args:
- --ignore-missing-imports
- --no-error-summary
- --disable-error-code=no-untyped-def
- --disable-error-code=no-untyped-call
- --disable-error-code=var-annotated
- --disable-error-code=no-any-unimported
- --disable-error-code=type-arg
- --disable-error-code=no-any-return
- --disable-error-code=misc
- --disable-error-code=unused-ignore
- --disable-error-code=unreachable
- --disable-error-code=assignment
- --disable-error-code=no-redef
- --disable-error-code=attr-defined
- --disable-error-code=arg-type
- --disable-error-code=union-attr
- --disable-error-code=call-overload
- --disable-error-code=return-value
- --disable-error-code=redundant-cast
- --disable-error-code=empty-body
- --disable-error-code=list-item
exclude: >-
(?x)^(
Bash/.*|
\.venv/.*|
linux_configuration/scripts/misc/testsAndMisc-bash/tools/.*
)$
additional_dependencies:
- types-requests
- types-PyYAML
- types-python-dateutil
# ===========================================================================
# PYLINT - Comprehensive Python linter (runs on push only for speed)
# ===========================================================================
- repo: https://github.com/pylint-dev/pylint
rev: v3.3.2
hooks:
- id: pylint
stages: [pre-push]
args:
- --rcfile=pyproject.toml
- --fail-under=8.0
- --jobs=0
additional_dependencies:
- pytest
- python-chess
- requests
- pygame
exclude: ^(Bash/|\.venv/)
# ===========================================================================
# BANDIT - Security linter (runs on push only for speed)
# ===========================================================================
- repo: https://github.com/PyCQA/bandit
rev: 1.7.10
hooks:
- id: bandit
stages: [pre-push]
args:
- -c
- pyproject.toml
- --severity-level=high
- --confidence-level=medium
- --skip=B113
additional_dependencies: ["bandit[toml]"]
exclude: ^(Bash/|\.venv/|tests/|.*test.*\.py$)
# ===========================================================================
# PYTEST + COVERAGE - Run tests and enforce 100% code coverage
# Only tests for subpackages with changed files are run (see script).
# Runs on push only (slow); use --hook-stage pre-push to run manually.
# ===========================================================================
- repo: local
hooks:
- id: pytest-coverage
name: pytest with coverage enforcement
entry: python scripts/pytest_changed_packages.py
language: system
types: [python]
pass_filenames: true
stages: [pre-push]
# ===========================================================================
# VULTURE - Dead code detection (disabled - doesn't work well with pre-commit)
# ===========================================================================
# - repo: https://github.com/jendrikseipp/vulture
# rev: v2.13
# hooks:
# - id: vulture
# args:
# - --min-confidence=80
# - --exclude=.venv,Bash,__pycache__
# exclude: ^(Bash/|\.venv/)
# ===========================================================================
# PYUPGRADE - Upgrade Python syntax (disabled - incompatible with Python 3.14)
# ===========================================================================
# - repo: https://github.com/asottile/pyupgrade
# rev: v3.19.0
# hooks:
# - id: pyupgrade
# args:
# - --py310-plus
# ===========================================================================
# CODESPELL - Spell checking in code (expanded ignore list for non-English)
# ===========================================================================
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
args:
- --skip=*.json,*.lock,*.min.js,*.min.css,.git,__pycache__,.venv,*.txt
- --ignore-words-list=als,ans,ect,nd,som,sur,te,nam,numer,lew,sie,wil,postion,clen,ther,folow,derrive,ony,tje,noe,theses,crate,doubleclick,wile,tabel,pary,blok,bloc,proces,serwer,parametr,adres,hart,dout,metod,tekst,synonim,grup,mosty,lokal,skalar,milion,nowe,tre,hel,alph
exclude: ^(Bash/ffmpeg-build/|LaTeX/|.*\.geojson$)
# ===========================================================================
# DOCFORMATTER - Format docstrings (disabled - causes recursion errors)
# ===========================================================================
# - repo: local
# hooks:
# - id: docformatter
# name: docformatter
# entry: docformatter
# language: system
# types: [python]
# args:
# - --in-place
# - --wrap-summaries=88
# - --wrap-descriptions=88
# ===========================================================================
# INTERROGATE - Docstring coverage (disabled - causes recursion on large files)
# ===========================================================================
# - repo: https://github.com/econchick/interrogate
# rev: 1.7.0
# hooks:
# - id: interrogate
# args:
# - --fail-under=0
# - --verbose
# - --ignore-init-method
# - --ignore-init-module
# - --ignore-magic
# - --ignore-private
# - --ignore-semiprivate
# - --exclude=Bash,.venv,__pycache__
# pass_filenames: false
# ===========================================================================
# AUTOFLAKE - Remove unused imports/variables
# Disabled: fully redundant with ruff (F401, F841, F811) + --fix
# ===========================================================================
# - repo: https://github.com/PyCQA/autoflake
# rev: v2.3.1
# hooks:
# - id: autoflake
# args:
# - --in-place
# - --remove-all-unused-imports
# - --remove-unused-variables
# - --remove-duplicate-keys
# - --expand-star-imports
# ===========================================================================
# SAFETY - Check for security vulnerabilities in dependencies
# ===========================================================================
# Note: Safety requires API key for full functionality, disabled by default
# - repo: https://github.com/Lucas-C/pre-commit-hooks-safety
# rev: v1.3.2
# hooks:
# - id: python-safety-dependencies-check
# files: requirements.*\.txt$
# ===========================================================================
# PYRIGHT - Microsoft's type checker (very strict, optional)
# ===========================================================================
# Uncomment to enable - can be slow and very strict
# - repo: https://github.com/RobertCraiworthy/pyright-action
# rev: v1.1.350
# hooks:
# - id: pyright
# ===========================================================================
# CHECK JSON/YAML/TOML formatting (runs on push only — slow Node.js startup)
# ===========================================================================
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier
types_or: [yaml, json, markdown]
exclude: ^(Bash/|\.venv/|.*\.lock$)
stages: [pre-push]
# ===========================================================================
# SHELLCHECK - Shell script linting
# Wrapper batches files to avoid OOM on large repos.
# ===========================================================================
- repo: local
hooks:
- id: shellcheck
name: shellcheck
entry: bash -c 'printf "%s\0" "$@" | xargs -0 -n 40 shellcheck --severity=warning' --
language: system
types: [shell]
# ===========================================================================
# CHECK PYTHON LOCATION - All Python files must be under python_pkg/
# ===========================================================================
- repo: local
hooks:
- id: check-python-location
name: check Python files are under python_pkg/
entry: scripts/check_python_location.sh
language: script
types: [python]
# ===========================================================================
# REMOVE EMPTY DIRECTORIES - Clean up empty folders in the repo
# ===========================================================================
- repo: local
hooks:
- id: remove-empty-dirs
name: remove empty directories
entry: find . -type d -empty -not -path './.git/*' -delete -print
language: system
pass_filenames: false
always_run: true
# ===========================================================================
# SECRET PATTERNS - Block commits containing sensitive data
# ===========================================================================
- repo: local
hooks:
- id: check-no-secrets
name: check for leaked secrets
entry: scripts/check_no_secrets.sh
language: script
exclude: ^(\.secret-patterns|\.pre-commit-config\.yaml|.*\.geojson)$
# ===========================================================================
# COMMITIZEN - Conventional commits (optional)
# ===========================================================================
# - repo: https://github.com/commitizen-tools/commitizen
# rev: v3.13.0
# hooks:
# - id: commitizen
# - id: commitizen-branch
# stages: [push]

346
meta/lint_python.sh Executable file
View File

@ -0,0 +1,346 @@
#!/usr/bin/env bash
# ==============================================================================
# Python Linting Script - Run ALL linters with aggressive settings
# ==============================================================================
# Usage:
# ./lint_python.sh # Lint all Python files
# ./lint_python.sh --fix # Lint and auto-fix where possible
# ./lint_python.sh <file.py> # Lint specific file
# ./lint_python.sh --quick # Quick lint (ruff + mypy only)
# ./lint_python.sh --report # Generate detailed reports
# ==============================================================================
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
BOLD='\033[1m'
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="${SCRIPT_DIR}"
PYTHON_PATHS=(
"PYTHON"
"articles"
"poker-modifier-app"
"tests"
)
EXCLUDE_PATHS=(
".venv"
"__pycache__"
".git"
"Bash/ffmpeg-build"
".pytest_cache"
".ruff_cache"
".mypy_cache"
)
# Build exclude pattern for find
EXCLUDE_PATTERN=""
for path in "${EXCLUDE_PATHS[@]}"; do
EXCLUDE_PATTERN="${EXCLUDE_PATTERN} -path '*/${path}/*' -prune -o"
done
# Parse arguments
FIX_MODE=false
QUICK_MODE=false
REPORT_MODE=false
TARGET_FILES=""
while [[ $# -gt 0 ]]; do
case $1 in
--fix|-f)
FIX_MODE=true
shift
;;
--quick|-q)
QUICK_MODE=true
shift
;;
--report|-r)
REPORT_MODE=true
shift
;;
--help|-h)
echo "Usage: $0 [OPTIONS] [FILES...]"
echo ""
echo "Options:"
echo " --fix, -f Auto-fix issues where possible"
echo " --quick, -q Quick mode (ruff + mypy only)"
echo " --report, -r Generate detailed reports to ./lint-reports/"
echo " --help, -h Show this help message"
echo ""
echo "Examples:"
echo " $0 # Lint all Python files"
echo " $0 --fix # Lint and auto-fix"
echo " $0 PYTHON/ # Lint specific directory"
echo " $0 --quick --fix # Quick lint with auto-fix"
exit 0
;;
*)
TARGET_FILES="${TARGET_FILES} $1"
shift
;;
esac
done
# If no target specified, use default paths
if [[ -z "${TARGET_FILES}" ]]; then
TARGET_FILES="${PYTHON_PATHS[*]}"
fi
# Create reports directory if needed
if [[ "${REPORT_MODE}" == true ]]; then
mkdir -p "${PROJECT_ROOT}/lint-reports"
fi
# Track overall status
OVERALL_STATUS=0
FAILED_TOOLS=()
# ==============================================================================
# Helper functions
# ==============================================================================
print_header() {
echo ""
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD}${BLUE} $1${NC}"
echo -e "${BOLD}${BLUE}══════════════════════════════════════════════════════════════${NC}"
}
print_subheader() {
echo ""
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
echo -e "${CYAN} $1${NC}"
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
}
print_success() {
echo -e "${GREEN}${NC} $1"
}
print_warning() {
echo -e "${YELLOW}${NC} $1"
}
print_error() {
echo -e "${RED}${NC} $1"
}
print_info() {
echo -e "${BLUE}${NC} $1"
}
run_tool() {
local tool_name="$1"
local tool_cmd="$2"
local report_file="${PROJECT_ROOT}/lint-reports/${tool_name}.txt"
print_subheader "Running ${tool_name}..."
if [[ "${REPORT_MODE}" == true ]]; then
if eval "${tool_cmd}" 2>&1 | tee "${report_file}"; then
print_success "${tool_name} passed"
return 0
else
print_error "${tool_name} found issues (see ${report_file})"
FAILED_TOOLS+=("${tool_name}")
return 1
fi
else
if eval "${tool_cmd}"; then
print_success "${tool_name} passed"
return 0
else
print_error "${tool_name} found issues"
FAILED_TOOLS+=("${tool_name}")
return 1
fi
fi
}
check_tool() {
if command -v "$1" &> /dev/null; then
return 0
else
print_warning "$1 not found, skipping..."
return 1
fi
}
# ==============================================================================
# Main linting process
# ==============================================================================
print_header "Python Linting Suite - Aggressive Mode"
echo ""
print_info "Target: ${TARGET_FILES}"
print_info "Fix mode: ${FIX_MODE}"
print_info "Quick mode: ${QUICK_MODE}"
print_info "Report mode: ${REPORT_MODE}"
cd "${PROJECT_ROOT}"
# ==============================================================================
# RUFF - Primary linter and formatter
# ==============================================================================
if check_tool ruff; then
if [[ "${FIX_MODE}" == true ]]; then
run_tool "ruff-lint" "ruff check --fix --show-fixes ${TARGET_FILES}" || OVERALL_STATUS=1
run_tool "ruff-format" "ruff format ${TARGET_FILES}" || OVERALL_STATUS=1
else
run_tool "ruff-lint" "ruff check ${TARGET_FILES}" || OVERALL_STATUS=1
run_tool "ruff-format-check" "ruff format --check ${TARGET_FILES}" || OVERALL_STATUS=1
fi
fi
# ==============================================================================
# MYPY - Static type checking
# ==============================================================================
if check_tool mypy; then
run_tool "mypy" "mypy --strict --ignore-missing-imports ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# Quick mode exits here
if [[ "${QUICK_MODE}" == true ]]; then
print_header "Quick Lint Complete"
if [[ ${#FAILED_TOOLS[@]} -gt 0 ]]; then
print_error "Failed tools: ${FAILED_TOOLS[*]}"
exit 1
else
print_success "All quick checks passed!"
exit 0
fi
fi
# ==============================================================================
# PYLINT - Comprehensive linting
# ==============================================================================
if check_tool pylint; then
run_tool "pylint" "pylint --rcfile=pyproject.toml --jobs=0 --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# BANDIT - Security linting
# ==============================================================================
if check_tool bandit; then
run_tool "bandit" "bandit -c pyproject.toml -r ${TARGET_FILES} --severity-level low --confidence-level low" || OVERALL_STATUS=1
fi
# ==============================================================================
# VULTURE - Dead code detection
# ==============================================================================
if check_tool vulture; then
run_tool "vulture" "vulture --min-confidence 80 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# FLAKE8 - Traditional linter
# ==============================================================================
if check_tool flake8; then
run_tool "flake8" "flake8 --max-line-length=88 --extend-ignore=E203,W503 --max-complexity=10 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# PYCODESTYLE - PEP 8 style checker
# ==============================================================================
if check_tool pycodestyle; then
run_tool "pycodestyle" "pycodestyle --max-line-length=88 --ignore=E203,W503 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# PYDOCSTYLE - Docstring style checker
# ==============================================================================
if check_tool pydocstyle; then
run_tool "pydocstyle" "pydocstyle --convention=google ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# RADON - Complexity metrics
# ==============================================================================
if check_tool radon; then
print_subheader "Running radon (complexity analysis)..."
echo ""
echo -e "${MAGENTA}Cyclomatic Complexity:${NC}"
radon cc -a -s ${TARGET_FILES} || true
echo ""
echo -e "${MAGENTA}Maintainability Index:${NC}"
radon mi -s ${TARGET_FILES} || true
if [[ "${REPORT_MODE}" == true ]]; then
radon cc -a -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-cc.txt" 2>&1 || true
radon mi -s ${TARGET_FILES} > "${PROJECT_ROOT}/lint-reports/radon-mi.txt" 2>&1 || true
fi
fi
# ==============================================================================
# INTERROGATE - Docstring coverage
# ==============================================================================
if check_tool interrogate; then
run_tool "interrogate" "interrogate -v --fail-under=0 ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# PYRIGHT - Microsoft's type checker (optional, very strict)
# ==============================================================================
if check_tool pyright; then
run_tool "pyright" "pyright ${TARGET_FILES}" || OVERALL_STATUS=1
fi
# ==============================================================================
# AUTOFLAKE - Unused imports/variables (fix mode only)
# ==============================================================================
if [[ "${FIX_MODE}" == true ]] && check_tool autoflake; then
print_subheader "Running autoflake (removing unused imports)..."
find ${TARGET_FILES} -name "*.py" -type f -exec autoflake --in-place --remove-all-unused-imports --remove-unused-variables {} \;
print_success "autoflake completed"
fi
# ==============================================================================
# PYUPGRADE - Upgrade Python syntax (fix mode only)
# ==============================================================================
if [[ "${FIX_MODE}" == true ]] && check_tool pyupgrade; then
print_subheader "Running pyupgrade (upgrading syntax to Python 3.10+)..."
find ${TARGET_FILES} -name "*.py" -type f -exec pyupgrade --py310-plus {} \;
print_success "pyupgrade completed"
fi
# ==============================================================================
# CODESPELL - Spell checking
# ==============================================================================
if check_tool codespell; then
if [[ "${FIX_MODE}" == true ]]; then
run_tool "codespell" "codespell -w --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
else
run_tool "codespell" "codespell --skip='*.json,*.lock,.git,__pycache__,.venv' ${TARGET_FILES}" || OVERALL_STATUS=1
fi
fi
# ==============================================================================
# Summary
# ==============================================================================
print_header "Linting Summary"
echo ""
if [[ ${OVERALL_STATUS} -ne 0 ]]; then
print_error "The following tools reported issues:"
for tool in "${FAILED_TOOLS[@]}"; do
echo " - ${tool}"
done
echo ""
if [[ "${REPORT_MODE}" == true ]]; then
print_info "Detailed reports saved to: ${PROJECT_ROOT}/lint-reports/"
fi
print_info "Run with --fix to auto-fix issues where possible"
exit ${OVERALL_STATUS}
else
print_success "All linting checks passed!"
exit 0
fi

308
meta/pyproject.toml Normal file
View File

@ -0,0 +1,308 @@
[project]
name = "testsandmisc"
version = "0.1.0"
description = "Collection of miscellaneous tests and scripts"
requires-python = ">=3.10"
# ============================================================================
# RUFF - Extremely fast Python linter and formatter (written in Rust)
# ============================================================================
[tool.ruff]
target-version = "py310"
# Include all Python files
include = ["*.py", "**/*.py"]
# Exclude vendored/build directories
exclude = [
".git",
".venv",
"__pycache__",
"build",
"dist",
".eggs",
"Bash/ffmpeg-build", # Vendored FFmpeg tools
]
[tool.ruff.lint]
# AGGRESSIVE: Select ALL rules from all categories
select = ["ALL"]
# Ignores for rules that are too strict for this mixed script repository
ignore = [
# D203 vs D211 conflict - we use D211 (no blank line before class docstring)
"D203", # 1 blank line required before class docstring (conflicts with D211)
# D212 vs D213 conflict - we use D212 (summary on first line after """)
"D213", # Multi-line docstring summary should start at second line (conflicts with D212)
# Formatter conflicts - recommended to disable when using ruff format
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
"COM812", # Trailing comma missing - formatter handles this automatically
"ISC001", # Implicit string concatenation - formatter may create these when wrapping
# Security audit - prone to false positives with validated input
# https://github.com/astral-sh/ruff/issues/4045
"S603", # subprocess call without shell - prone to false positives as it is
# difficult to determine whether the passed arguments have been validated
]
# Allow ALL rules to be auto-fixed
fixable = ["ALL"]
unfixable = []
# Per-file ignores — only rules that FUNDAMENTALLY conflict with test code remain.
# Every other rule was fixed in source. See justifications below.
[tool.ruff.lint.per-file-ignores]
"**/tests/**/*.py" = [
"ARG", # @patch decorators inject mock params that aren't always referenced;
# the patch side-effect is needed, not the mock object itself.
"D", # Test names like test_sub_cards_no_answer_text are self-documenting;
# docstrings would be redundant noise on every test method.
"PLC0415", # Test isolation requires importing AFTER mocking sys.modules;
# top-level imports would bypass the mocks entirely.
"PLR2004", # assert count == 5 is clearer than assert count == EXPECTED_COUNT;
# named constants for test expectations add indirection without value.
"S101", # assert IS what tests do — every Python test suite suppresses this.
"SLF001", # Unit tests must exercise private internals (_method, _attr) to reach
# 100% branch coverage; only integration tests can avoid this.
]
"**/test_*.py" = [
"ARG",
"D",
"PLC0415",
"PLR2004",
"S101",
"SLF001",
]
[tool.ruff.lint.pydocstyle]
convention = "google" # Use Google docstring convention
[tool.ruff.lint.isort]
force-single-line = false
force-sort-within-sections = true
known-first-party = ["python_pkg"]
[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
inline-quotes = "double"
[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = "all"
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"
docstring-code-format = true
# ============================================================================
# MYPY - Static type checker (most aggressive settings)
# ============================================================================
[tool.mypy]
python_version = "3.10"
# Strict mode enables most checks
strict = true
# Additional aggressive settings
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_no_return = true
warn_unreachable = true
# Extra strict settings
disallow_any_unimported = true
disallow_any_explicit = false # Too aggressive for practical use
disallow_any_generics = true
disallow_subclassing_any = true
strict_equality = true
extra_checks = true
# Allow missing imports for third-party packages
ignore_missing_imports = true
# Show error codes
show_error_codes = true
# Enable colored output
color_output = true
# Exclude vendored directories
exclude = [
"Bash/ffmpeg-build/",
".venv/",
"linux_configuration/scripts/misc/testsAndMisc-bash/tools/", # Avoid duplicate module named 'tools'
]
# ============================================================================
# PYLINT - Comprehensive Python linter
# ============================================================================
[tool.pylint.main]
# Analyse import fallback blocks
analyse-fallback-blocks = true
# Pickle collected data for later comparisons
persistent = true
# Jobs to use for parallel execution (0 = auto)
jobs = 0
# Minimum Python version
py-version = "3.10"
# Ignore vendored directories
ignore = ["Bash", ".venv", "__pycache__"]
# Ignore patterns
ignore-patterns = [".*\\.pyi$"]
# Allow C extension modules to be introspected
extension-pkg-allow-list = ["cv2", "pygame", "lxml"]
[tool.pylint.messages_control]
# Enable all checks by disabling disable
enable = "all"
# No disabled checks - maximum strictness
disable = []
[tool.pylint.design]
# Mixins and single-entry-point classes may have zero public methods
min-public-methods = 0
# Test modules can be large
max-module-lines = 1000
# UI/mixin classes accumulate attributes across multiple mixins
max-attributes = 10
[tool.pylint.spelling]
# No spelling dictionary to avoid false positives
spelling-dict = ""
[tool.pylint.typecheck]
# cv2 (OpenCV) dynamically loads members from C extension at runtime.
# unittest.mock.MagicMock generates assertion/introspection methods at runtime.
generated-members = [
"cv2.*",
".*\\.assert_called_once_with",
".*\\.assert_called_once",
".*\\.assert_called",
".*\\.assert_not_called",
".*\\.assert_any_call",
".*\\.call_args",
".*\\.call_args_list",
".*\\.call_count",
]
# ============================================================================
# BANDIT - Security linter
# ============================================================================
[tool.bandit]
# Exclude test directories and vendored code
exclude_dirs = ["tests", ".venv", "Bash/ffmpeg-build"]
# ============================================================================
# BLACK & ISORT - Removed (ruff handles formatting and import sorting)
# ============================================================================
# ============================================================================
# PYTEST - Testing framework configuration
# ============================================================================
[tool.pytest.ini_options]
testpaths = ["python_pkg"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
"-v",
"--strict-markers",
"--strict-config",
"-ra",
"--cov=python_pkg",
"--cov-branch",
"--cov-report=term-missing",
"--cov-report=lcov",
]
filterwarnings = [
"error",
"ignore::DeprecationWarning",
"default::pytest.PytestUnraisableExceptionWarning",
]
# ============================================================================
# COVERAGE - Code coverage configuration
# ============================================================================
[tool.coverage.run]
source = ["python_pkg"]
branch = true
omit = [
"*/__pycache__/*",
"*/tests/*",
"*/.venv/*",
]
[tool.coverage.report]
# Fail under this percentage
fail_under = 100
show_missing = true
skip_covered = false
exclude_lines = [
# Standard exclusions
"pragma: no cover",
# Unreachable defensive code
"raise NotImplementedError",
"raise AssertionError",
# Type checking imports
"if TYPE_CHECKING:",
# Main script entry point
'if __name__ == "__main__":',
]
# Partial branch exclusions for unreachable branches
partial_branches = [
"pragma: no branch",
]
# ============================================================================
# VULTURE - Dead code detection
# ============================================================================
# Note: Vulture uses command-line args, but we can document settings here
# vulture --min-confidence 80 --exclude ".venv,Bash" .
# ============================================================================
# FLAKE8 - Python linter (via Flake8-pyproject for pyproject.toml support)
# ============================================================================
[tool.flake8]
# Maximum line length (matches ruff/black)
max-line-length = 88
# Maximum McCabe complexity (matches ruff C901 threshold)
max-complexity = 10
# Maximum cognitive complexity (flake8-cognitive-complexity)
max-cognitive-complexity = 12
# Maximum function length (flake8-functions)
max-function-length = 20
# Maximum returns/arguments per function
max-returns-amount = 6
max-arguments = 5
# Docstring convention (matches ruff)
docstring-convention = "google"
# Select all error codes
select = ["E", "F", "W", "C", "B", "B950"]
# Extend with plugin codes
extend-select = ["B", "B9", "C4", "SIM", "PT", "TC", "ANN"]
# Ignore rules that conflict with ruff-format or are duplicated
extend-ignore = [
"E501", # Line too long - B950 from bugbear is smarter (allows 10% overflow)
"W503", # Line break before binary operator - contradicts PEP 8 update
"ANN101", # Missing type annotation for self
"ANN102", # Missing type annotation for cls
]
# Exclude directories
exclude = [
".git",
".venv",
"__pycache__",
"build",
"dist",
".eggs",
"Bash/ffmpeg-build",
]
# Per-file ignores
per-file-ignores = [
"**/tests/**/*.py:S101,ANN",
"**/test_*.py:S101,ANN",
]
# ============================================================================
# PYDOCSTYLE - Docstring style checker (ruff handles this, but for standalone)
# ============================================================================
# Configured in ruff.lint.pydocstyle above

79
meta/requirements.txt Normal file
View File

@ -0,0 +1,79 @@
# ==============================================================================
# testsAndMisc — combined runtime + development dependencies
# Install with: pip install -r meta/requirements.txt
# Sorted alphabetically (enforced by pre-commit `requirements-txt-fixer`).
# ==============================================================================
add-trailing-comma>=3.1.0
aiohttp>=3.9
autoflake>=2.2.0
autopep8>=2.0.0
bandit>=1.7.0
beautifulsoup4>=4.0
berserk>=0.13
black>=24.0.0
bottle>=0.12
codespell>=2.2.0
coverage>=7.4.0
darglint>=1.8.0
dead>=1.5.0
docformatter>=1.7.0
fixit>=2.1.0
flake8>=7.0.0
flake8-annotations>=3.0.0
flake8-bandit>=4.1.0
flake8-bugbear>=24.0.0
flake8-comprehensions>=3.14.0
flake8-docstrings>=1.7.0
flake8-eradicate>=1.5.0
flake8-pie>=0.16.0
flake8-print>=5.0.0
flake8-pyi>=24.0.0
flake8-pytest-style>=2.0.0
flake8-return>=1.2.0
flake8-simplify>=0.21.0
genanki>=0.13
geopandas>=1.0
howlongtobeatpy>=1.0
hypothesis>=6.98.0
importlib-metadata>=7.0.0
interrogate>=1.5.0
isort>=5.13.0
lxml>=5.0
matplotlib>=3.0
mccabe>=0.7.0
mitmproxy>=10.0
mypy>=1.8.0
numpy>=1.20
opencv-python>=4.0
pillow>=10.0
pip-audit>=2.6.0
pipdeptree>=2.14.0
pre-commit>=3.6.0
prospector>=1.10.0
pycodestyle>=2.11.0
pydocstyle>=6.3.0
pyflakes>=3.2.0
pygame>=2.0
pylama>=8.4.0
pylint>=3.0.0
pyright>=1.1.350
pytest>=8.0.0
pytest-cov>=4.1.0
pytest-randomly>=3.15.0
pytest-sugar>=1.0.0
pytest-timeout>=2.2.0
pytest-xdist>=3.5.0
python-chess>=1.999
pyupgrade>=3.15.0
radon>=6.0.0
reorder-python-imports>=3.12.0
requests>=2.0
ruff>=0.8.0
safety>=2.3.0
selenium>=4.0
types-python-dateutil>=2.8.0
types-PyYAML>=6.0.0
types-requests>=2.31.0
types-setuptools>=69.0.0
websockets>=13.0

148
meta/run.sh Executable file
View File

@ -0,0 +1,148 @@
#!/bin/bash
# Easy entrypoint for system usage reports and polling script diagnostics.
# Usage:
# ./run.sh # today's report to stdout
# ./run.sh --date 20260501 # specific day
# ./run.sh --top 25 # override row count
# ./run.sh --profile [duration] # profile polling scripts (default 60s)
# ./run.sh --diagnose # find inefficient shell scripts
# ./run.sh --init-artifacts ... # bootstrap contract/evidence/session artifacts
#
# Any other args are forwarded to usage_report.py unchanged.
set -euo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
REPORT_SCRIPT="$SCRIPT_DIR/linux_configuration/scripts/system-maintenance/bin/usage_report.py"
ARTIFACT_INIT_SCRIPT="$SCRIPT_DIR/scripts/init_agent_artifacts.sh"
if [[ ! -f "$REPORT_SCRIPT" ]]; then
echo "Error: usage_report.py not found at: $REPORT_SCRIPT" >&2
exit 1
fi
if [[ ! -f "$ARTIFACT_INIT_SCRIPT" ]]; then
echo "Error: init_agent_artifacts.sh not found at: $ARTIFACT_INIT_SCRIPT" >&2
exit 1
fi
# Profiling mode: trace fork-heavy scripts over time
profile_polling_scripts() {
local duration="${1:-60}"
echo "=== Polling Script Profiler (${duration}s) ===" >&2
echo "Tracing fork/exec calls in shell scripts..." >&2
echo "" >&2
# Find common polling script processes and trace them
local trace_file="/tmp/polling_trace_$$.txt"
# Use perf/strace to capture system calls
(
timeout "$duration" strace -f -e trace=clone,execve -c -p $$ 2>&1 || true
) > "$trace_file" 2>&1
echo "Trace completed. Analyzing results:" >&2
echo "" >&2
# Show fork/exec heavy processes
if ! grep -e "execve" -e "clone" "$trace_file" | head -20; then
:
fi
rm -f "$trace_file"
}
# Diagnostic mode: find inefficient patterns in shell scripts
diagnose_polling_scripts() {
echo "=== Shell Script Efficiency Audit ===" >&2
echo "" >&2
local issues_found=0
# Check for common anti-patterns
echo "Checking for anti-patterns in shell scripts..." >&2
echo "" >&2
# Pattern 1: while true with sleep (no event-driven check)
echo "1. Polling loops (while true + sleep):" >&2
set +e
grep -r "while true\|while :" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 2: $(date +...) calls in loops (fork-heavy)
echo "2. Excessive date calls (each forks a process):" >&2
set +e
grep -r '\$(date' --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 3: pgrep/xdotool in loops
echo "3. Process inspection in loops (pgrep, xdotool):" >&2
set +e
grep -r "while.*pgrep\|while.*xdotool\|pgrep.*while" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 4: pipes in hot paths
echo "4. Heavy pipes in polling scripts (| awk, | grep, | tr):" >&2
set +e
while_true_file_list="$(mktemp)"
heavy_pipe_matches="$(mktemp)"
grep -r "while true" --include="*.sh" "$SCRIPT_DIR" > "$while_true_file_list" 2>/dev/null
if [ -s "$while_true_file_list" ]; then
xargs grep -l -e " | awk" -e " | grep" -e " | tr" < "$while_true_file_list" > "$heavy_pipe_matches" 2>/dev/null
head -5 "$heavy_pipe_matches"
fi
rm -f "$while_true_file_list" "$heavy_pipe_matches"
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 5: sleep with very short intervals
echo "5. Aggressive polling (sleep < 1s):" >&2
set +e
grep -rE "sleep 0\.[0-9]|sleep 0[^0-9]" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
echo "=== Recommendations ===" >&2
echo "1. Replace 'while true + sleep' with event-driven I/O (inotifywait, read -t, etc.)" >&2
echo "2. Use /proc and /sys instead of forking date, sensors, acpi, etc." >&2
echo "3. Cache frequently accessed values (e.g., in /tmp state files)" >&2
echo "4. Use bash builtins: printf %()T instead of date, \${var//} instead of tr, etc." >&2
echo "5. Use i3blocks interval=persist + event loop instead of polling mode" >&2
echo "6. Increase polling intervals: 1s → 5s → 10s where acceptable" >&2
}
# Handle special modes
case "${1:-}" in
--profile)
profile_polling_scripts "${2:-60}"
exit 0
;;
--diagnose)
diagnose_polling_scripts
exit 0
;;
--init-artifacts)
shift
exec "$ARTIFACT_INIT_SCRIPT" "$@"
;;
--help)
grep '^# Usage:' "$0" | sed 's/^# //' | head -1
grep '^# ' "$0" | sed 's/^# / /'
exit 0
;;
esac
# Default: run usage_report.py with all remaining args
exec python3 "$REPORT_SCRIPT" "$@"

View File

@ -1,308 +0,0 @@
[project]
name = "testsandmisc"
version = "0.1.0"
description = "Collection of miscellaneous tests and scripts"
requires-python = ">=3.10"
# ============================================================================
# RUFF - Extremely fast Python linter and formatter (written in Rust)
# ============================================================================
[tool.ruff]
target-version = "py310"
# Include all Python files
include = ["*.py", "**/*.py"]
# Exclude vendored/build directories
exclude = [
".git",
".venv",
"__pycache__",
"build",
"dist",
".eggs",
"Bash/ffmpeg-build", # Vendored FFmpeg tools
]
[tool.ruff.lint]
# AGGRESSIVE: Select ALL rules from all categories
select = ["ALL"]
# Ignores for rules that are too strict for this mixed script repository
ignore = [
# D203 vs D211 conflict - we use D211 (no blank line before class docstring)
"D203", # 1 blank line required before class docstring (conflicts with D211)
# D212 vs D213 conflict - we use D212 (summary on first line after """)
"D213", # Multi-line docstring summary should start at second line (conflicts with D212)
# Formatter conflicts - recommended to disable when using ruff format
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
"COM812", # Trailing comma missing - formatter handles this automatically
"ISC001", # Implicit string concatenation - formatter may create these when wrapping
# Security audit - prone to false positives with validated input
# https://github.com/astral-sh/ruff/issues/4045
"S603", # subprocess call without shell - prone to false positives as it is
# difficult to determine whether the passed arguments have been validated
]
# Allow ALL rules to be auto-fixed
fixable = ["ALL"]
unfixable = []
# Per-file ignores — only rules that FUNDAMENTALLY conflict with test code remain.
# Every other rule was fixed in source. See justifications below.
[tool.ruff.lint.per-file-ignores]
"**/tests/**/*.py" = [
"ARG", # @patch decorators inject mock params that aren't always referenced;
# the patch side-effect is needed, not the mock object itself.
"D", # Test names like test_sub_cards_no_answer_text are self-documenting;
# docstrings would be redundant noise on every test method.
"PLC0415", # Test isolation requires importing AFTER mocking sys.modules;
# top-level imports would bypass the mocks entirely.
"PLR2004", # assert count == 5 is clearer than assert count == EXPECTED_COUNT;
# named constants for test expectations add indirection without value.
"S101", # assert IS what tests do — every Python test suite suppresses this.
"SLF001", # Unit tests must exercise private internals (_method, _attr) to reach
# 100% branch coverage; only integration tests can avoid this.
]
"**/test_*.py" = [
"ARG",
"D",
"PLC0415",
"PLR2004",
"S101",
"SLF001",
]
[tool.ruff.lint.pydocstyle]
convention = "google" # Use Google docstring convention
[tool.ruff.lint.isort]
force-single-line = false
force-sort-within-sections = true
known-first-party = ["python_pkg"]
[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
inline-quotes = "double"
[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = "all"
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"
docstring-code-format = true
# ============================================================================
# MYPY - Static type checker (most aggressive settings)
# ============================================================================
[tool.mypy]
python_version = "3.10"
# Strict mode enables most checks
strict = true
# Additional aggressive settings
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_no_return = true
warn_unreachable = true
# Extra strict settings
disallow_any_unimported = true
disallow_any_explicit = false # Too aggressive for practical use
disallow_any_generics = true
disallow_subclassing_any = true
strict_equality = true
extra_checks = true
# Allow missing imports for third-party packages
ignore_missing_imports = true
# Show error codes
show_error_codes = true
# Enable colored output
color_output = true
# Exclude vendored directories
exclude = [
"Bash/ffmpeg-build/",
".venv/",
"linux_configuration/scripts/misc/testsAndMisc-bash/tools/", # Avoid duplicate module named 'tools'
]
# ============================================================================
# PYLINT - Comprehensive Python linter
# ============================================================================
[tool.pylint.main]
# Analyse import fallback blocks
analyse-fallback-blocks = true
# Pickle collected data for later comparisons
persistent = true
# Jobs to use for parallel execution (0 = auto)
jobs = 0
# Minimum Python version
py-version = "3.10"
# Ignore vendored directories
ignore = ["Bash", ".venv", "__pycache__"]
# Ignore patterns
ignore-patterns = [".*\\.pyi$"]
# Allow C extension modules to be introspected
extension-pkg-allow-list = ["cv2", "pygame", "lxml"]
[tool.pylint.messages_control]
# Enable all checks by disabling disable
enable = "all"
# No disabled checks - maximum strictness
disable = []
[tool.pylint.design]
# Mixins and single-entry-point classes may have zero public methods
min-public-methods = 0
# Test modules can be large
max-module-lines = 1000
# UI/mixin classes accumulate attributes across multiple mixins
max-attributes = 10
[tool.pylint.spelling]
# No spelling dictionary to avoid false positives
spelling-dict = ""
[tool.pylint.typecheck]
# cv2 (OpenCV) dynamically loads members from C extension at runtime.
# unittest.mock.MagicMock generates assertion/introspection methods at runtime.
generated-members = [
"cv2.*",
".*\\.assert_called_once_with",
".*\\.assert_called_once",
".*\\.assert_called",
".*\\.assert_not_called",
".*\\.assert_any_call",
".*\\.call_args",
".*\\.call_args_list",
".*\\.call_count",
]
# ============================================================================
# BANDIT - Security linter
# ============================================================================
[tool.bandit]
# Exclude test directories and vendored code
exclude_dirs = ["tests", ".venv", "Bash/ffmpeg-build"]
# ============================================================================
# BLACK & ISORT - Removed (ruff handles formatting and import sorting)
# ============================================================================
# ============================================================================
# PYTEST - Testing framework configuration
# ============================================================================
[tool.pytest.ini_options]
testpaths = ["python_pkg"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
"-v",
"--strict-markers",
"--strict-config",
"-ra",
"--cov=python_pkg",
"--cov-branch",
"--cov-report=term-missing",
"--cov-report=lcov",
]
filterwarnings = [
"error",
"ignore::DeprecationWarning",
"default::pytest.PytestUnraisableExceptionWarning",
]
# ============================================================================
# COVERAGE - Code coverage configuration
# ============================================================================
[tool.coverage.run]
source = ["python_pkg"]
branch = true
omit = [
"*/__pycache__/*",
"*/tests/*",
"*/.venv/*",
]
[tool.coverage.report]
# Fail under this percentage
fail_under = 100
show_missing = true
skip_covered = false
exclude_lines = [
# Standard exclusions
"pragma: no cover",
# Unreachable defensive code
"raise NotImplementedError",
"raise AssertionError",
# Type checking imports
"if TYPE_CHECKING:",
# Main script entry point
'if __name__ == "__main__":',
]
# Partial branch exclusions for unreachable branches
partial_branches = [
"pragma: no branch",
]
# ============================================================================
# VULTURE - Dead code detection
# ============================================================================
# Note: Vulture uses command-line args, but we can document settings here
# vulture --min-confidence 80 --exclude ".venv,Bash" .
# ============================================================================
# FLAKE8 - Python linter (via Flake8-pyproject for pyproject.toml support)
# ============================================================================
[tool.flake8]
# Maximum line length (matches ruff/black)
max-line-length = 88
# Maximum McCabe complexity (matches ruff C901 threshold)
max-complexity = 10
# Maximum cognitive complexity (flake8-cognitive-complexity)
max-cognitive-complexity = 12
# Maximum function length (flake8-functions)
max-function-length = 20
# Maximum returns/arguments per function
max-returns-amount = 6
max-arguments = 5
# Docstring convention (matches ruff)
docstring-convention = "google"
# Select all error codes
select = ["E", "F", "W", "C", "B", "B950"]
# Extend with plugin codes
extend-select = ["B", "B9", "C4", "SIM", "PT", "TC", "ANN"]
# Ignore rules that conflict with ruff-format or are duplicated
extend-ignore = [
"E501", # Line too long - B950 from bugbear is smarter (allows 10% overflow)
"W503", # Line break before binary operator - contradicts PEP 8 update
"ANN101", # Missing type annotation for self
"ANN102", # Missing type annotation for cls
]
# Exclude directories
exclude = [
".git",
".venv",
"__pycache__",
"build",
"dist",
".eggs",
"Bash/ffmpeg-build",
]
# Per-file ignores
per-file-ignores = [
"**/tests/**/*.py:S101,ANN",
"**/test_*.py:S101,ANN",
]
# ============================================================================
# PYDOCSTYLE - Docstring style checker (ruff handles this, but for standalone)
# ============================================================================
# Configured in ruff.lint.pydocstyle above

1
pyproject.toml Symbolic link
View File

@ -0,0 +1 @@
meta/pyproject.toml

View File

@ -1,206 +0,0 @@
"""Shared geographic data module for Warsaw and Poland Anki generators.
This module handles downloading and caching geographic data from various sources:
- OpenStreetMap via Overpass API
- Geofabrik OSM extracts
- GitHub repositories with pre-processed GeoJSON
All data is cached locally to avoid repeated downloads.
"""
from __future__ import annotations
import shutil
import sys
from python_pkg.geo_data._common import (
CACHE_DIR,
MAX_RETRIES,
MIN_LAKE_AREA_KM2,
MIN_LINE_COORDS,
MIN_PEAK_ELEVATION,
MIN_RING_COORDS,
MIN_RIVER_LENGTH_KM,
OVERPASS_ENDPOINTS,
POLSKA_GEOJSON_BASE,
REQUEST_TIMEOUT,
RETRY_DELAY,
WIKIDATA_SPARQL,
)
from python_pkg.geo_data._poland_admin import (
get_poland_boundary,
get_polish_gminy,
get_polish_powiaty,
get_polish_wojewodztwa,
)
from python_pkg.geo_data._poland_nature import (
get_polish_forests,
get_polish_landscape_parks,
get_polish_mountain_peaks,
get_polish_mountain_ranges,
get_polish_national_parks,
get_polish_nature_reserves,
)
from python_pkg.geo_data._poland_water import (
get_polish_coastal_features,
get_polish_islands,
get_polish_lakes,
get_polish_rivers,
get_polish_unesco_sites,
)
from python_pkg.geo_data._warsaw import (
get_vistula_river,
get_warsaw_boundary,
get_warsaw_bridges,
get_warsaw_districts,
get_warsaw_metro_stations,
get_warsaw_osiedla,
)
from python_pkg.geo_data._warsaw_places import get_warsaw_landmarks, get_warsaw_streets
__all__ = [
"CACHE_DIR",
"MAX_RETRIES",
"MIN_LAKE_AREA_KM2",
"MIN_LINE_COORDS",
"MIN_PEAK_ELEVATION",
"MIN_RING_COORDS",
"MIN_RIVER_LENGTH_KM",
"OVERPASS_ENDPOINTS",
"POLSKA_GEOJSON_BASE",
"REQUEST_TIMEOUT",
"RETRY_DELAY",
"WIKIDATA_SPARQL",
"clear_cache",
"download_all_poland_data",
"download_all_warsaw_data",
"get_poland_boundary",
"get_polish_coastal_features",
"get_polish_forests",
"get_polish_gminy",
"get_polish_islands",
"get_polish_lakes",
"get_polish_landscape_parks",
"get_polish_mountain_peaks",
"get_polish_mountain_ranges",
"get_polish_national_parks",
"get_polish_nature_reserves",
"get_polish_powiaty",
"get_polish_rivers",
"get_polish_unesco_sites",
"get_polish_wojewodztwa",
"get_vistula_river",
"get_warsaw_boundary",
"get_warsaw_bridges",
"get_warsaw_districts",
"get_warsaw_landmarks",
"get_warsaw_metro_stations",
"get_warsaw_osiedla",
"get_warsaw_streets",
]
def download_all_warsaw_data() -> None:
"""Download and cache all Warsaw geographic data.
Call this once to pre-populate the cache.
"""
sys.stdout.write("Downloading all Warsaw geographic data...\n")
sys.stdout.write("=" * 60 + "\n")
sys.stdout.write("\n1. Warsaw boundary...\n")
get_warsaw_boundary()
sys.stdout.write("\n2. Vistula river...\n")
get_vistula_river()
sys.stdout.write("\n3. Warsaw bridges...\n")
get_warsaw_bridges()
sys.stdout.write("\n4. Metro stations...\n")
get_warsaw_metro_stations()
sys.stdout.write("\n5. Major streets...\n")
get_warsaw_streets()
sys.stdout.write("\n6. Landmarks...\n")
get_warsaw_landmarks()
sys.stdout.write("\n7. Osiedla...\n")
get_warsaw_osiedla()
sys.stdout.write("\n" + "=" * 60 + "\n")
sys.stdout.write("All Warsaw data cached successfully!\n")
def download_all_poland_data() -> None:
"""Download and cache all Poland geographic data.
Call this once to pre-populate the cache.
"""
sys.stdout.write("Downloading all Poland geographic data...\n")
sys.stdout.write("=" * 60 + "\n")
sys.stdout.write("\n1. Województwa...\n")
get_polish_wojewodztwa()
sys.stdout.write("\n2. Powiaty...\n")
get_polish_powiaty()
sys.stdout.write("\n3. Gminy (this may take a while)...\n")
get_polish_gminy()
sys.stdout.write("\n4. Poland boundary...\n")
get_poland_boundary()
sys.stdout.write("\n" + "=" * 60 + "\n")
sys.stdout.write("All Poland data cached successfully!\n")
def clear_cache() -> None:
"""Clear all cached data."""
if CACHE_DIR.exists():
shutil.rmtree(CACHE_DIR)
sys.stdout.write("Cache cleared.\n")
else:
sys.stdout.write("Cache directory does not exist.\n")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Manage geographic data cache")
parser.add_argument(
"--download-warsaw",
action="store_true",
help="Download all Warsaw data",
)
parser.add_argument(
"--download-poland",
action="store_true",
help="Download all Poland data",
)
parser.add_argument(
"--download-all",
action="store_true",
help="Download all data",
)
parser.add_argument(
"--clear-cache",
action="store_true",
help="Clear cached data",
)
args = parser.parse_args()
if args.clear_cache:
clear_cache()
elif args.download_warsaw:
download_all_warsaw_data()
elif args.download_poland:
download_all_poland_data()
elif args.download_all:
download_all_warsaw_data()
download_all_poland_data()
else:
parser.print_help()

View File

@ -1,317 +0,0 @@
"""Common utilities for geographic data operations.
Shared constants, API helpers, and geometry extraction functions used
across the geo_data package.
"""
from __future__ import annotations
import json
from pathlib import Path
import sys
import time
from typing import TYPE_CHECKING
import geopandas as gpd
import requests
from shapely.geometry import (
GeometryCollection,
MultiPolygon,
Polygon,
)
if TYPE_CHECKING:
from typing import Any
# Parent directory of the geo_data package (i.e. python_pkg/)
_PKG_DIR = Path(__file__).resolve().parent.parent
# Shared cache directory for all geo data
CACHE_DIR = _PKG_DIR / "geo_cache"
# Overpass API endpoints (multiple for redundancy)
# Note: kumi.systems is more reliable, so it's first
OVERPASS_ENDPOINTS = [
"https://overpass.kumi.systems/api/interpreter",
"https://overpass-api.de/api/interpreter",
"https://maps.mail.ru/osm/tools/overpass/api/interpreter",
]
# GitHub URLs for pre-processed data
POLSKA_GEOJSON_BASE = "https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master"
# Wikidata SPARQL endpoint
WIKIDATA_SPARQL = "https://query.wikidata.org/sparql"
# Request timeout and retry settings
REQUEST_TIMEOUT = 180
MAX_RETRIES = 3
RETRY_DELAY = 5
# Data thresholds for filtering
MIN_PEAK_ELEVATION = 300 # meters
MIN_LAKE_AREA_KM2 = 0.5 # km²
MIN_RIVER_LENGTH_KM = 10 # km
MIN_LINE_COORDS = 2 # minimum coordinates for a line
MIN_RING_COORDS = 4 # minimum coordinates for a polygon ring
def _ensure_cache_dir() -> None:
"""Create cache directory if it doesn't exist."""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def _extract_polygonal_geometry(
geom: Polygon | MultiPolygon | GeometryCollection,
) -> Polygon | MultiPolygon | None:
"""Extract only polygonal geometry from a geometry that may be mixed.
Some OSM data comes as GeometryCollections containing polygons mixed with
lines. This function extracts only the polygon/multipolygon parts.
Args:
geom: Input geometry (Polygon, MultiPolygon, or GeometryCollection).
Returns:
Polygon or MultiPolygon with only the polygonal parts, or None if empty.
"""
if isinstance(geom, Polygon | MultiPolygon):
return geom
if isinstance(geom, GeometryCollection):
polygons = [g for g in geom.geoms if isinstance(g, Polygon | MultiPolygon)]
if not polygons:
return None
if len(polygons) == 1:
return polygons[0]
# Flatten MultiPolygons and combine all polygons
all_polys = []
for p in polygons:
if isinstance(p, Polygon):
all_polys.append(p)
elif isinstance(p, MultiPolygon): # pragma: no branch
all_polys.extend(p.geoms)
return MultiPolygon(all_polys)
return None
def _try_single_request(
endpoint: str, query: str
) -> tuple[dict[str, Any] | None, Exception | None]:
"""Try a single request to an endpoint.
Args:
endpoint: Overpass API endpoint URL.
query: Overpass QL query string.
Returns:
Tuple of (result, error). One will be None.
"""
try:
sys.stdout.write(f" Querying {endpoint}...\n")
response = requests.post(
endpoint,
data={"data": query},
timeout=REQUEST_TIMEOUT,
)
response.raise_for_status()
result = response.json()
except (requests.RequestException, requests.Timeout, ValueError) as e:
return None, e
else:
# Check for valid response with elements
if not isinstance(result, dict) or "elements" not in result:
return None, ValueError("Invalid response format")
return result, None
def _overpass_query(query: str) -> dict[str, Any]:
"""Execute an Overpass API query with retry logic.
Args:
query: Overpass QL query string.
Returns:
JSON response from the API.
Raises:
RuntimeError: If all endpoints fail.
"""
last_error: Exception | None = None
for endpoint in OVERPASS_ENDPOINTS:
for attempt in range(MAX_RETRIES):
result, error = _try_single_request(endpoint, query)
if result is not None:
return result
last_error = error
sys.stdout.write(f" Attempt {attempt + 1} failed: {error}\n")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
msg = f"All Overpass API endpoints failed. Last error: {last_error}"
raise RuntimeError(msg)
def _download_github_geojson(url: str, cache_path: Path) -> gpd.GeoDataFrame:
"""Download GeoJSON from GitHub and cache it.
Args:
url: URL to download from.
cache_path: Path to cache the data.
Returns:
GeoDataFrame with the data.
"""
if cache_path.exists():
return gpd.read_file(cache_path)
sys.stdout.write(f"Downloading from {url}...\n")
if not url.startswith(("http://", "https://")):
msg = f"Unsupported URL scheme: {url}"
raise ValueError(msg)
response = requests.get(url, timeout=REQUEST_TIMEOUT)
data = response.json()
_ensure_cache_dir()
cache_path.write_text(json.dumps(data))
return gpd.GeoDataFrame.from_features(data["features"], crs="EPSG:4326")
def _extract_osiedla_rings(
element: dict[str, Any], min_coords: int
) -> tuple[list[list[tuple[float, float]]], list[list[tuple[float, float]]]]:
"""Extract outer and inner rings from an OSM relation.
Args:
element: OSM relation element.
min_coords: Minimum number of coordinates for a valid ring.
Returns:
Tuple of (outer_rings, inner_rings).
"""
outer_rings: list[list[tuple[float, float]]] = []
inner_rings: list[list[tuple[float, float]]] = []
for member in element.get("members", []):
if "geometry" not in member:
continue
ring = [(p["lon"], p["lat"]) for p in member["geometry"]]
if len(ring) < min_coords:
continue
# Close the ring if not closed
if ring[0] != ring[-1]:
ring.append(ring[0])
if member.get("role") == "outer":
outer_rings.append(ring)
elif member.get("role") == "inner":
inner_rings.append(ring)
return outer_rings, inner_rings
def _build_osiedla_geometry(
outer_rings: list[list[tuple[float, float]]],
inner_rings: list[list[tuple[float, float]]],
) -> dict[str, Any]:
"""Build GeoJSON geometry from outer and inner rings.
Args:
outer_rings: List of outer ring coordinates.
inner_rings: List of inner ring coordinates.
Returns:
GeoJSON geometry dict.
"""
if len(outer_rings) == 1:
return {
"type": "Polygon",
"coordinates": [outer_rings[0], *inner_rings],
}
# Multiple outer rings - create MultiPolygon
# Each polygon in a MultiPolygon is [exterior, hole1, hole2, ...]
return {
"type": "MultiPolygon",
"coordinates": [[ring] for ring in outer_rings],
}
def _extract_polygon_from_element(
element: dict[str, Any],
) -> dict[str, Any] | None:
"""Extract polygon geometry from an OSM relation or way element.
Args:
element: OSM element (relation or way).
Returns:
GeoJSON geometry dict, or None if extraction fails.
"""
if element.get("type") == "relation":
outer_rings, inner_rings = _extract_osiedla_rings(element, MIN_RING_COORDS)
if not outer_rings:
return None
return _build_osiedla_geometry(outer_rings, inner_rings)
if element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) < MIN_RING_COORDS:
return None
if coords[0] != coords[-1]:
coords.append(coords[0])
return {"type": "Polygon", "coordinates": [coords]}
return None
def _extract_line_from_way(element: dict[str, Any]) -> dict[str, Any] | None:
"""Extract line geometry from an OSM way element.
Args:
element: OSM way element.
Returns:
GeoJSON LineString geometry dict, or None if extraction fails.
"""
if element.get("type") != "way" or "geometry" not in element:
return None
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) < MIN_LINE_COORDS:
return None
return {"type": "LineString", "coordinates": coords}
def _add_area_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""Add area_km2 column to a GeoDataFrame.
Args:
gdf: GeoDataFrame with polygon geometries.
Returns:
GeoDataFrame with area_km2 column added.
"""
if len(gdf) == 0:
return gdf
gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
return gdf
def _add_length_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""Add length_km column to a GeoDataFrame.
Args:
gdf: GeoDataFrame with line geometries.
Returns:
GeoDataFrame with length_km column added.
"""
if len(gdf) == 0:
return gdf
gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system
gdf["length_km"] = gdf_proj.geometry.length / 1000
return gdf

View File

@ -1,225 +0,0 @@
"""Polish administrative boundary data.
Functions for downloading and caching Polish administrative divisions:
województwa, powiaty, gminy, and the national boundary.
Includes Wikidata integration for population data.
"""
from __future__ import annotations
import contextlib
import json
import sys
from typing import TYPE_CHECKING
import geopandas as gpd
import requests
from python_pkg.geo_data._common import (
CACHE_DIR,
POLSKA_GEOJSON_BASE,
WIKIDATA_SPARQL,
_add_area_column,
_build_osiedla_geometry,
_download_github_geojson,
_ensure_cache_dir,
_extract_osiedla_rings,
_overpass_query,
)
if TYPE_CHECKING:
from typing import Any
def _query_wikidata(query: str) -> list[dict[str, Any]]:
"""Query Wikidata SPARQL endpoint.
Args:
query: SPARQL query string.
Returns:
List of result bindings.
"""
response = requests.get(
WIKIDATA_SPARQL,
params={"query": query, "format": "json"},
timeout=60,
)
response.raise_for_status()
return response.json()["results"]["bindings"]
def _get_powiaty_population() -> dict[str, int]:
"""Get population data for all Polish powiaty from Wikidata.
Returns:
Dictionary mapping powiat name to population.
"""
cache_path = CACHE_DIR / "powiaty_population.json"
if cache_path.exists():
return json.loads(cache_path.read_text())
# Query Wikidata for all powiaty (Q247073) in Poland (Q36) with population
# Filter to only current Polish powiaty using country=Poland filter
query = """
SELECT ?powiat ?powiatLabel ?population WHERE {
?powiat wdt:P31 wd:Q247073.
?powiat wdt:P17 wd:Q36.
?powiat wdt:P1082 ?population.
SERVICE wikibase:label { bd:serviceParam wikibase:language "pl,en". }
}
ORDER BY DESC(?population)
"""
sys.stdout.write("Fetching powiaty population data from Wikidata...\n")
results = _query_wikidata(query)
population_map: dict[str, int] = {}
for item in results:
label = item.get("powiatLabel", {}).get("value", "")
pop = item.get("population", {}).get("value", "0")
if label and pop:
# Remove "powiat" prefix if present for matching
clean_label = label.replace("powiat ", "").strip()
with contextlib.suppress(ValueError):
population_map[clean_label] = int(pop)
_ensure_cache_dir()
cache_path.write_text(json.dumps(population_map, ensure_ascii=False, indent=2))
sys.stdout.write(f"Cached population data for {len(population_map)} powiaty.\n")
return population_map
def get_polish_wojewodztwa() -> gpd.GeoDataFrame:
"""Get Polish województwa (voivodeships).
Returns:
GeoDataFrame with województwa boundaries.
"""
url = f"{POLSKA_GEOJSON_BASE}/wojewodztwa/wojewodztwa-min.geojson"
cache_path = CACHE_DIR / "polish_wojewodztwa.geojson"
return _download_github_geojson(url, cache_path)
def get_polish_powiaty() -> gpd.GeoDataFrame:
"""Get Polish powiaty (counties), sorted by population descending.
Returns:
GeoDataFrame with powiat boundaries and population.
"""
url = f"{POLSKA_GEOJSON_BASE}/powiaty/powiaty-min.geojson"
cache_path = CACHE_DIR / "polish_powiaty.geojson"
gdf = _download_github_geojson(url, cache_path)
# Get population data from Wikidata
population_map = _get_powiaty_population()
# Add population column
def get_population(nazwa: str) -> int:
"""Match powiat name to population data."""
if not nazwa:
return 0
# Remove "powiat " prefix for matching
clean_name = nazwa.replace("powiat ", "").strip()
# Try direct match
if clean_name in population_map:
return population_map[clean_name]
# Try lowercase
name_lower = clean_name.lower()
for pop_name, pop in population_map.items():
if pop_name.lower() == name_lower:
return pop
return 0
gdf["population"] = gdf["nazwa"].apply(get_population)
# Sort by population descending
return gdf.sort_values("population", ascending=False).reset_index(drop=True)
def get_polish_gminy() -> gpd.GeoDataFrame:
"""Get Polish gminy (municipalities) from OSM, sorted by area descending.
Returns:
GeoDataFrame with gminy boundaries.
"""
cache_path = CACHE_DIR / "polish_gminy.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching gminy data from OSM (this may take a while)...\n")
# Polish gminy are admin_level=7 in OSM
query = """
[out:json][timeout:300];
area["ISO3166-1"="PL"]->.pl;
relation["boundary"="administrative"]["admin_level"="7"]["name"](area.pl);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
if element.get("type") != "relation":
continue
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name},
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(features)} gminy.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
# Add area column
gdf = _add_area_column(gdf)
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
def get_poland_boundary() -> gpd.GeoDataFrame:
"""Get Poland country boundary.
Returns:
GeoDataFrame with Poland boundary.
"""
cache_path = CACHE_DIR / "poland_boundary.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
# Dissolve from województwa
woj = get_polish_wojewodztwa()
# Fix invalid geometries with buffer(0)
woj["geometry"] = woj["geometry"].buffer(0)
poland = gpd.GeoDataFrame(geometry=[woj.union_all()], crs=woj.crs)
_ensure_cache_dir()
poland.to_file(cache_path, driver="GeoJSON")
return poland

View File

@ -1,446 +0,0 @@
"""Polish natural land features.
Functions for downloading and caching data about Polish mountains,
national parks, forests, nature reserves, and landscape parks.
"""
from __future__ import annotations
import contextlib
import json
import sys
from typing import TYPE_CHECKING
import geopandas as gpd
from python_pkg.geo_data._common import (
CACHE_DIR,
MIN_PEAK_ELEVATION,
_add_area_column,
_build_osiedla_geometry,
_ensure_cache_dir,
_extract_osiedla_rings,
_extract_polygon_from_element,
_extract_polygonal_geometry,
_overpass_query,
)
if TYPE_CHECKING:
from typing import Any
def get_polish_mountain_peaks() -> gpd.GeoDataFrame:
"""Get Polish mountain peaks, sorted by elevation descending.
Returns:
GeoDataFrame with mountain peak points and elevation.
"""
cache_path = CACHE_DIR / "polish_mountain_peaks.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
return gdf.sort_values("elevation", ascending=False).reset_index(drop=True)
sys.stdout.write("Fetching mountain peaks data from OSM...\n")
query = """
[out:json][timeout:120];
area["ISO3166-1"="PL"]->.pl;
(
node["natural"="peak"]["name"]["ele"](area.pl);
);
out;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
if element.get("type") != "node":
continue
name = element.get("tags", {}).get("name", "")
ele_str = element.get("tags", {}).get("ele", "")
if not name or not ele_str or name in seen_names:
continue
with contextlib.suppress(ValueError):
elevation = float(ele_str.replace(",", ".").split()[0])
if elevation < MIN_PEAK_ELEVATION:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name, "elevation": elevation},
"geometry": {
"type": "Point",
"coordinates": [element["lon"], element["lat"]],
},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} mountain peaks.\n")
if not features:
msg = "No mountain peaks found in OSM data"
raise ValueError(msg)
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
return gdf.sort_values("elevation", ascending=False).reset_index(drop=True)
def get_polish_mountain_ranges() -> gpd.GeoDataFrame:
"""Get Polish mountain ranges, sorted by area descending.
Returns:
GeoDataFrame with mountain range polygons.
"""
cache_path = CACHE_DIR / "polish_mountain_ranges.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching mountain ranges data from OSM...\n")
query = """
[out:json][timeout:180];
area["ISO3166-1"="PL"]->.pl;
(
relation["natural"="mountain_range"]["name"](area.pl);
way["natural"="mountain_range"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features: list[dict[str, Any]] = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
if element.get("type") == "relation":
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
geometry = _build_osiedla_geometry(outer_rings, inner_rings)
elif element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) < min_ring_coords:
continue
if coords[0] != coords[-1]:
coords.append(coords[0])
geometry = {"type": "Polygon", "coordinates": [coords]}
else:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} mountain ranges.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
# Calculate area in km²
gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
def get_polish_national_parks() -> gpd.GeoDataFrame:
"""Get all 23 Polish national parks, sorted by area descending.
Returns:
GeoDataFrame with national park polygons.
"""
cache_path = CACHE_DIR / "polish_national_parks.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching national parks data from OSM...\n")
query = """
[out:json][timeout:180];
area["ISO3166-1"="PL"]->.pl;
(
relation["boundary"="national_park"]["name"](area.pl);
relation["leisure"="nature_reserve"]["name"]["protect_class"="2"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
if element.get("type") != "relation":
continue
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
# Filter to only include "Park Narodowy" in name
if "Narodowy" not in name and "narodowy" not in name.lower():
continue
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name},
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} national parks.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
# Calculate area in km²
gdf_proj = gdf.to_crs("EPSG:2180")
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
def get_polish_forests() -> gpd.GeoDataFrame:
"""Get major Polish forests (puszcze), sorted by area descending.
Returns:
GeoDataFrame with forest polygons.
"""
cache_path = CACHE_DIR / "polish_forests.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching forests data from OSM...\n")
# Query for named forests, especially "Puszcza" type
query = """
[out:json][timeout:300];
area["ISO3166-1"="PL"]->.pl;
(
relation["natural"="wood"]["name"](area.pl);
relation["landuse"="forest"]["name"~"Puszcza|Bory|Las"](area.pl);
way["natural"="wood"]["name"~"Puszcza|Bory"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
forest_keywords = ("Puszcza", "Bory", "Las ", "Lasy ")
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
if not any(keyword in name for keyword in forest_keywords):
continue
geometry = _extract_polygon_from_element(element)
if geometry is None:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} forests.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_area_column(gdf)
if len(gdf) > 0:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
def get_polish_nature_reserves() -> gpd.GeoDataFrame:
"""Get Polish nature reserves, sorted by area descending.
Returns:
GeoDataFrame with nature reserve polygons.
"""
cache_path = CACHE_DIR / "polish_nature_reserves.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write(
"Fetching nature reserves data from OSM (this may take a while)...\n"
)
query = """
[out:json][timeout:600];
area["ISO3166-1"="PL"]->.pl;
(
relation["leisure"="nature_reserve"]["name"](area.pl);
way["leisure"="nature_reserve"]["name"](area.pl);
relation["boundary"="protected_area"]["protect_class"="4"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
geometry = _extract_polygon_from_element(element)
if geometry is None:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} nature reserves.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_area_column(gdf)
if len(gdf) > 0:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
def get_polish_landscape_parks() -> gpd.GeoDataFrame:
"""Get Polish landscape parks, sorted by area descending.
Returns:
GeoDataFrame with landscape park polygons.
"""
cache_path = CACHE_DIR / "polish_landscape_parks.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
# Remove any rows where geometry extraction failed
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching landscape parks data from OSM...\n")
query = """
[out:json][timeout:300];
area["ISO3166-1"="PL"]->.pl;
(
relation["boundary"="protected_area"]["protect_class"="5"]["name"](area.pl);
relation["leisure"="nature_reserve"]["name"~"Park Krajobrazowy"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
if element.get("type") != "relation":
continue
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name},
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} landscape parks.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
# Remove any rows where geometry extraction failed
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
if len(gdf) > 0:
gdf_proj = gdf.to_crs("EPSG:2180")
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf

View File

@ -1,437 +0,0 @@
"""Polish water features and cultural sites.
Functions for downloading and caching data about Polish lakes, rivers,
islands, coastal features, and UNESCO World Heritage sites.
"""
from __future__ import annotations
import json
import sys
from typing import TYPE_CHECKING
import geopandas as gpd
from python_pkg.geo_data._common import (
CACHE_DIR,
MIN_LAKE_AREA_KM2,
MIN_LINE_COORDS,
MIN_RING_COORDS,
MIN_RIVER_LENGTH_KM,
_add_area_column,
_add_length_column,
_build_osiedla_geometry,
_ensure_cache_dir,
_extract_osiedla_rings,
_extract_polygon_from_element,
_overpass_query,
)
if TYPE_CHECKING:
from typing import Any
def _extract_coastal_geometry(
element: dict[str, Any],
natural_type: str,
line_types: tuple[str, ...],
) -> dict[str, Any] | None:
"""Extract geometry from a coastal feature element.
For cliffs and beaches, returns LineString. For others, returns Polygon.
Args:
element: OSM element.
natural_type: The natural= tag value.
line_types: Tuple of natural types that should be lines.
Returns:
GeoJSON geometry dict, or None if extraction fails.
"""
if element.get("type") == "relation":
return _extract_polygon_from_element(element)
if element.get("type") != "way" or "geometry" not in element:
return None
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) < MIN_LINE_COORDS:
return None
# For cliffs and beaches, keep as linestring
if natural_type in line_types:
return {"type": "LineString", "coordinates": coords}
# Otherwise try to make a polygon
if len(coords) >= MIN_RING_COORDS:
if coords[0] != coords[-1]:
coords.append(coords[0])
return {"type": "Polygon", "coordinates": [coords]}
return None
def _extract_river_coords_from_element(
element: dict[str, Any],
) -> list[list[tuple[float, float]]]:
"""Extract coordinate lists from a river element.
Args:
element: OSM element (way or relation).
Returns:
List of coordinate lists (line segments).
"""
coord_lists: list[list[tuple[float, float]]] = []
if element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) >= MIN_LINE_COORDS:
coord_lists.append(coords)
elif element.get("type") == "relation":
for member in element.get("members", []):
if member.get("type") == "way" and "geometry" in member:
coords = [(p["lon"], p["lat"]) for p in member["geometry"]]
if len(coords) >= MIN_LINE_COORDS:
coord_lists.append(coords)
return coord_lists
def get_polish_lakes() -> gpd.GeoDataFrame:
"""Get Polish lakes, sorted by area descending.
Returns:
GeoDataFrame with lake polygons.
"""
cache_path = CACHE_DIR / "polish_lakes.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching lakes data from OSM...\n")
query = """
[out:json][timeout:300];
area["ISO3166-1"="PL"]->.pl;
(
relation["natural"="water"]["water"="lake"]["name"](area.pl);
way["natural"="water"]["water"="lake"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
geometry = _extract_polygon_from_element(element)
if geometry is None:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} lakes.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_area_column(gdf)
if len(gdf) > 0:
# Filter to lakes > MIN_LAKE_AREA_KM2 to exclude tiny ponds
gdf = gdf[gdf["area_km2"] > MIN_LAKE_AREA_KM2]
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
def get_polish_rivers() -> gpd.GeoDataFrame:
"""Get Polish rivers, sorted by length descending.
Rivers with the same name but in different locations are kept separate
by using unique IDs from OSM when available.
Returns:
GeoDataFrame with river linestrings.
"""
cache_path = CACHE_DIR / "polish_rivers.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "length_km" in gdf.columns:
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching rivers data from OSM...\n")
query = """
[out:json][timeout:300];
area["ISO3166-1"="PL"]->.pl;
(
relation["waterway"="river"]["name"](area.pl);
way["waterway"="river"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
# Group ways by river name AND wikidata ID (or OSM ID for uniqueness)
# This prevents merging different rivers with the same name
rivers_by_key: dict[str, list[list[tuple[float, float]]]] = {}
river_names: dict[str, str] = {} # key -> display name
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name:
continue
# Use wikidata ID if available, otherwise use element type+id
wikidata = element.get("tags", {}).get("wikidata", "")
if wikidata:
key = f"{name}_{wikidata}"
else:
# Fall back to element ID for grouping related ways
key = f"{name}_{element.get('type')}_{element.get('id')}"
coord_lists = _extract_river_coords_from_element(element)
if coord_lists:
rivers_by_key.setdefault(key, []).extend(coord_lists)
river_names[key] = name
features = []
for key, coord_lists in rivers_by_key.items():
name = river_names[key]
geometry: dict[str, Any]
if len(coord_lists) == 1:
geometry = {"type": "LineString", "coordinates": coord_lists[0]}
else:
geometry = {"type": "MultiLineString", "coordinates": coord_lists}
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} rivers.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_length_column(gdf)
if len(gdf) > 0:
gdf = gdf[gdf["length_km"] > MIN_RIVER_LENGTH_KM]
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
return gdf
def get_polish_islands() -> gpd.GeoDataFrame:
"""Get Polish islands, sorted by area descending.
Returns:
GeoDataFrame with island polygons.
"""
cache_path = CACHE_DIR / "polish_islands.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching islands data from OSM...\n")
query = """
[out:json][timeout:180];
area["ISO3166-1"="PL"]->.pl;
(
relation["place"="island"]["name"](area.pl);
way["place"="island"]["name"](area.pl);
relation["place"="islet"]["name"](area.pl);
way["place"="islet"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
geometry = _extract_polygon_from_element(element)
if geometry is None:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} islands.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_area_column(gdf)
if len(gdf) > 0:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
def get_polish_coastal_features() -> gpd.GeoDataFrame:
"""Get Polish coastal features (peninsulas, spits, cliffs), sorted by length.
Returns:
GeoDataFrame with coastal feature geometries.
"""
cache_path = CACHE_DIR / "polish_coastal_features.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "length_km" in gdf.columns:
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching coastal features data from OSM...\n")
query = """
[out:json][timeout:180];
area["ISO3166-1"="PL"]->.pl;
(
relation["natural"="peninsula"]["name"](area.pl);
way["natural"="peninsula"]["name"](area.pl);
relation["natural"="spit"]["name"](area.pl);
way["natural"="spit"]["name"](area.pl);
relation["natural"="cliff"]["name"](area.pl);
way["natural"="cliff"]["name"](area.pl);
relation["natural"="coastline"]["name"](area.pl);
way["natural"="beach"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
line_types = ("cliff", "beach", "coastline")
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
natural_type = element.get("tags", {}).get("natural", "")
if not name or name in seen_names:
continue
geometry = _extract_coastal_geometry(element, natural_type, line_types)
if geometry is None:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name, "type": natural_type},
"geometry": geometry,
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} coastal features.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_length_column(gdf)
if len(gdf) > 0:
return gdf.sort_values("length_km", ascending=False).reset_index(drop=True)
return gdf
def get_polish_unesco_sites() -> gpd.GeoDataFrame:
"""Get Polish UNESCO World Heritage Sites, sorted by inscription year.
Returns:
GeoDataFrame with UNESCO site geometries.
"""
cache_path = CACHE_DIR / "polish_unesco_sites.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
sys.stdout.write("Fetching UNESCO sites data from OSM...\n")
query = """
[out:json][timeout:180];
area["ISO3166-1"="PL"]->.pl;
(
relation["heritage"="world_heritage_site"]["name"](area.pl);
way["heritage"="world_heritage_site"]["name"](area.pl);
node["heritage"="world_heritage_site"]["name"](area.pl);
relation["heritage:operator"="whc"]["name"](area.pl);
way["heritage:operator"="whc"]["name"](area.pl);
node["heritage:operator"="whc"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
if element.get("type") == "node":
geometry: dict[str, Any] = {
"type": "Point",
"coordinates": [element["lon"], element["lat"]],
}
elif element.get("type") == "relation":
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
geometry = _build_osiedla_geometry(outer_rings, inner_rings)
elif element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) < min_ring_coords:
continue
if coords[0] != coords[-1]:
coords.append(coords[0])
geometry = {"type": "Polygon", "coordinates": [coords]}
else:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} UNESCO sites.\n")
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")

View File

@ -1,407 +0,0 @@
"""Warsaw geographic data functions.
Functions for downloading and caching Warsaw-specific geographic data:
boundaries, districts, Vistula river, bridges, metro stations, and osiedla.
"""
from __future__ import annotations
import json
import sys
import geopandas as gpd
from shapely.geometry import LineString
from python_pkg.geo_data._common import (
_PKG_DIR,
CACHE_DIR,
_build_osiedla_geometry,
_ensure_cache_dir,
_extract_osiedla_rings,
_overpass_query,
)
def get_warsaw_boundary() -> gpd.GeoDataFrame:
"""Get Warsaw city boundary.
Returns:
GeoDataFrame with Warsaw boundary polygon.
"""
cache_path = CACHE_DIR / "warsaw_boundary.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
# Try to use districts file first
districts_path = (
_PKG_DIR / "anki_decks" / "warsaw_districts" / "warszawa-dzielnice.geojson"
)
if districts_path.exists():
warsaw_gdf = gpd.read_file(districts_path)
warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"]
if len(warsaw_boundary) == 0:
warsaw_boundary = gpd.GeoDataFrame(
geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs
)
_ensure_cache_dir()
warsaw_boundary.to_file(cache_path, driver="GeoJSON")
return warsaw_boundary
# Fallback to Overpass query
sys.stdout.write("Fetching Warsaw boundary from OpenStreetMap...\n")
query = """
[out:json][timeout:60];
relation["name"="Warszawa"]["admin_level"="6"];
out geom;
"""
data = _overpass_query(query)
features = []
for element in data.get("elements", []):
if element.get("type") == "relation":
coords = []
for member in element.get("members", []):
if member.get("role") == "outer" and "geometry" in member:
coords.extend([(p["lon"], p["lat"]) for p in member["geometry"]])
if coords:
features.append(
{
"type": "Feature",
"properties": {"name": "Warszawa"},
"geometry": {"type": "Polygon", "coordinates": [coords]},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
def get_warsaw_districts() -> gpd.GeoDataFrame:
"""Get Warsaw districts (dzielnice).
Returns:
GeoDataFrame with district boundaries.
"""
districts_path = (
_PKG_DIR / "anki_decks" / "warsaw_districts" / "warszawa-dzielnice.geojson"
)
if districts_path.exists():
gdf = gpd.read_file(districts_path)
return gdf[gdf["name"] != "Warszawa"].copy()
msg = "Warsaw districts GeoJSON not found"
raise FileNotFoundError(msg)
def get_vistula_river() -> gpd.GeoDataFrame:
"""Get Vistula river in Warsaw.
Returns:
GeoDataFrame with river geometry.
"""
cache_path = CACHE_DIR / "warsaw_vistula.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
sys.stdout.write("Fetching Vistula river data...\n")
query = """
[out:json][timeout:60];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
(
way["waterway"="river"]["name"="Wisła"](area.warsaw);
);
out geom;
"""
data = _overpass_query(query)
features = []
min_coords = 2
for element in data.get("elements", []):
if element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) >= min_coords:
features.append(
{
"type": "Feature",
"properties": {"name": "Wisła"},
"geometry": {"type": "LineString", "coordinates": coords},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
def get_warsaw_bridges() -> gpd.GeoDataFrame:
"""Get Warsaw bridges over the Vistula.
Returns:
GeoDataFrame with bridge geometries.
"""
cache_path = CACHE_DIR / "warsaw_bridges.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
sys.stdout.write("Fetching Warsaw bridges data...\n")
# First get the Vistula to filter bridges
vistula = get_vistula_river()
vistula_union = vistula.union_all()
vistula_buffer = vistula_union.buffer(0.002) # ~200m buffer
# Query for bridges with "Most" in name - smaller query
query = """
[out:json][timeout:90];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
way["bridge"="yes"]["name"~"^Most"](area.warsaw);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_coords = 2
for element in data.get("elements", []):
if element.get("type") != "way" or "geometry" not in element:
continue
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) < min_coords:
continue
line = LineString(coords)
# Check if bridge crosses/is near Vistula
if line.intersects(vistula_buffer):
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name, "osm_id": element.get("id")},
"geometry": {"type": "LineString", "coordinates": coords},
}
)
# Merge segments of the same bridge
merged_features = _merge_bridge_segments(features)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": merged_features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(merged_features)} bridges.\n")
return gpd.GeoDataFrame.from_features(merged_features, crs="EPSG:4326")
def _merge_bridge_segments(features: list[dict]) -> list[dict]:
"""Merge bridge segments with the same name.
Args:
features: List of GeoJSON features.
Returns:
List of merged features.
"""
by_name: dict[str, list[list[tuple[float, float]]]] = {}
for feature in features:
name = feature["properties"]["name"]
coords = feature["geometry"]["coordinates"]
if name not in by_name:
by_name[name] = []
by_name[name].append(coords)
merged = []
for name, coord_lists in by_name.items():
if len(coord_lists) == 1:
geom = {"type": "LineString", "coordinates": coord_lists[0]}
else:
geom = {"type": "MultiLineString", "coordinates": coord_lists}
merged.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geom}
)
return merged
def get_warsaw_metro_stations() -> gpd.GeoDataFrame:
"""Get Warsaw metro stations with line information.
Returns:
GeoDataFrame with station points and line info (M1, M2, or M1/M2).
"""
cache_path = CACHE_DIR / "warsaw_metro.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
# Known stations for each line (as of 2024)
m1_stations = {
"Kabaty",
"Natolin",
"Imielin",
"Stokłosy",
"Ursynów",
"Służew",
"Wilanowska",
"Wierzbno",
"Racławicka",
"Pole Mokotowskie",
"Politechnika",
"Centrum",
"Świętokrzyska", # Also M2
"Ratusz-Arsenał",
"Dworzec Gdański",
"Plac Wilsona",
"Marymont",
"Słodowiec",
"Stare Bielany",
"Wawrzyszew",
"Młociny",
}
m2_stations = {
"Bródno",
"Kondratowicza",
"Zacisze",
"Targówek Mieszkaniowy",
"Trocka",
"Szwedzka",
"Dworzec Wileński",
"Świętokrzyska", # Also M1
"Nowy Świat-Uniwersytet",
"Centrum Nauki Kopernik",
"Stadion Narodowy",
"Rondo ONZ",
"Rondo Daszyńskiego",
"Płocka",
"Młynów",
"Księcia Janusza",
"Ulrychów",
"Bemowo",
}
sys.stdout.write("Fetching metro station data...\n")
query = """
[out:json][timeout:60];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
(
node["railway"="station"]["station"="subway"](area.warsaw);
node["railway"="station"]["network"~"Metro"](area.warsaw);
);
out body;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
if element.get("type") == "node":
name = element.get("tags", {}).get("name", "")
if name and name not in seen_names:
seen_names.add(name)
# Determine line from known station lists
in_m1 = name in m1_stations
in_m2 = name in m2_stations
if in_m1 and in_m2:
line = "M1/M2"
elif in_m1:
line = "M1"
elif in_m2:
line = "M2"
else:
line = "?" # Unknown station
features.append(
{
"type": "Feature",
"properties": {
"name": name,
"line": line,
},
"geometry": {
"type": "Point",
"coordinates": [element["lon"], element["lat"]],
},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(features)} metro stations.\n")
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
def get_warsaw_osiedla() -> gpd.GeoDataFrame:
"""Get Warsaw osiedla (neighborhoods).
Returns:
GeoDataFrame with osiedla boundaries.
"""
cache_path = CACHE_DIR / "warsaw_osiedla.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
sys.stdout.write("Fetching osiedla data...\n")
query = """
[out:json][timeout:180];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
relation["boundary"="administrative"]["admin_level"="11"]["name"](area.warsaw);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
if element.get("type") != "relation":
continue
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name},
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(features)} osiedla.\n")
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")

View File

@ -1,189 +0,0 @@
"""Warsaw streets, landmarks, and place data.
Functions for downloading and caching Warsaw streets, landmarks,
and other place-related geographic data.
"""
from __future__ import annotations
import json
import sys
import geopandas as gpd
from shapely.geometry import MultiLineString
from python_pkg.geo_data._common import CACHE_DIR, _ensure_cache_dir, _overpass_query
def get_warsaw_streets(min_length: int = 500) -> gpd.GeoDataFrame:
"""Get major Warsaw streets.
Args:
min_length: Minimum street length in meters.
Returns:
GeoDataFrame with street geometries.
"""
cache_path = CACHE_DIR / "warsaw_streets.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
# Filter by length if needed
return _filter_streets_by_length(gdf, min_length)
sys.stdout.write("Fetching street data from OpenStreetMap...\n")
query = """
[out:json][timeout:120];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
(
way["highway"="primary"]["name"](area.warsaw);
way["highway"="secondary"]["name"](area.warsaw);
way["highway"="tertiary"]["name"](area.warsaw);
);
out geom;
"""
data = _overpass_query(query)
features = []
min_coords = 2
for element in data.get("elements", []):
if element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) >= min_coords:
features.append(
{
"type": "Feature",
"properties": {
"name": element.get("tags", {}).get("name", "Unknown"),
"highway": element.get("tags", {}).get("highway", ""),
},
"geometry": {"type": "LineString", "coordinates": coords},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(features)} street segments.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
return _filter_streets_by_length(gdf, min_length)
def _filter_streets_by_length(
gdf: gpd.GeoDataFrame, min_length: int
) -> gpd.GeoDataFrame:
"""Filter and merge streets by name, keeping only those above min_length.
Args:
gdf: GeoDataFrame with street segments.
min_length: Minimum length in meters.
Returns:
GeoDataFrame with merged streets, sorted by length (longest first).
"""
# Group by street name
streets: dict[str, list] = {}
for _, row in gdf.iterrows():
name = row.get("name", "Unknown")
if name and name != "Unknown":
if name not in streets:
streets[name] = []
streets[name].append(row.geometry)
# Merge and filter
result_rows = []
for name, geometries in streets.items():
merged = geometries[0] if len(geometries) == 1 else MultiLineString(geometries)
# Create temp GeoDataFrame for length calculation
temp_gdf = gpd.GeoDataFrame(geometry=[merged], crs="EPSG:4326")
temp_proj = temp_gdf.to_crs("EPSG:2180") # Polish coordinate system
length = temp_proj.geometry.length.iloc[0]
if length >= min_length:
result_rows.append({"name": name, "geometry": merged, "length_m": length})
# Sort by length (longest first)
result_rows.sort(key=lambda x: x["length_m"], reverse=True)
return gpd.GeoDataFrame(
result_rows,
crs="EPSG:4326" if result_rows else None,
)
def get_warsaw_landmarks() -> gpd.GeoDataFrame:
"""Get Warsaw landmarks (museums, monuments, parks, etc.).
Returns:
GeoDataFrame with landmark points.
"""
cache_path = CACHE_DIR / "warsaw_landmarks.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
sys.stdout.write("Fetching landmark data...\n")
# Simplified query - just museums and major attractions
query = """
[out:json][timeout:60];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
(
node["tourism"="museum"]["name"](area.warsaw);
node["tourism"="attraction"]["name"](area.warsaw);
node["historic"="monument"]["name"](area.warsaw);
way["tourism"="museum"]["name"](area.warsaw);
way["tourism"="attraction"]["name"](area.warsaw);
);
out center;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
# Get coordinates
if element.get("type") == "node":
lon, lat = element["lon"], element["lat"]
elif "center" in element:
lon, lat = element["center"]["lon"], element["center"]["lat"]
else:
continue
seen_names.add(name)
landmark_type = (
element.get("tags", {}).get("tourism")
or element.get("tags", {}).get("historic")
or element.get("tags", {}).get("leisure")
or "landmark"
)
features.append(
{
"type": "Feature",
"properties": {"name": name, "type": landmark_type},
"geometry": {"type": "Point", "coordinates": [lon, lat]},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(features)} landmarks.\n")
if not features:
return gpd.GeoDataFrame(
{"name": [], "type": [], "geometry": []}, crs="EPSG:4326"
)
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")

View File

@ -1,487 +0,0 @@
"""Tests for python_pkg.geo_data._common module."""
from __future__ import annotations
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from shapely.geometry import (
GeometryCollection,
LineString,
MultiPolygon,
Point,
Polygon,
)
from python_pkg.geo_data._common import (
_build_osiedla_geometry,
_download_github_geojson,
_ensure_cache_dir,
_extract_line_from_way,
_extract_osiedla_rings,
_extract_polygon_from_element,
_extract_polygonal_geometry,
_overpass_query,
_try_single_request,
)
class TestEnsureCacheDir:
"""Tests for _ensure_cache_dir."""
def test_creates_directory(self) -> None:
with patch.object(Path, "mkdir") as mock_mkdir:
_ensure_cache_dir()
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
class TestExtractPolygonalGeometry:
"""Tests for _extract_polygonal_geometry."""
def test_polygon_returned_directly(self) -> None:
poly = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
result = _extract_polygonal_geometry(poly)
assert result is poly
def test_multipolygon_returned_directly(self) -> None:
mp = MultiPolygon(
[
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
]
)
result = _extract_polygonal_geometry(mp)
assert result is mp
def test_geometry_collection_single_polygon(self) -> None:
poly = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
gc = GeometryCollection([poly, LineString([(0, 0), (1, 1)])])
result = _extract_polygonal_geometry(gc)
assert result is not None
assert result.equals(poly)
def test_geometry_collection_multiple_polygons(self) -> None:
p1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
p2 = Polygon([(2, 2), (3, 2), (3, 3), (2, 3)])
gc = GeometryCollection([p1, p2, LineString([(0, 0), (1, 1)])])
result = _extract_polygonal_geometry(gc)
assert isinstance(result, MultiPolygon)
def test_geometry_collection_with_multipolygon(self) -> None:
p1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
mp = MultiPolygon(
[
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
Polygon([(4, 4), (5, 4), (5, 5), (4, 5)]),
]
)
gc = GeometryCollection([p1, mp])
result = _extract_polygonal_geometry(gc)
assert isinstance(result, MultiPolygon)
def test_geometry_collection_no_polygons(self) -> None:
gc = GeometryCollection([LineString([(0, 0), (1, 1)])])
result = _extract_polygonal_geometry(gc)
assert result is None
def test_unsupported_geometry_type(self) -> None:
point = Point(0, 0)
result = _extract_polygonal_geometry(point)
assert result is None
class TestTrySingleRequest:
"""Tests for _try_single_request."""
@patch("python_pkg.geo_data._common.requests.post")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_successful_request(
self, mock_stdout: MagicMock, mock_post: MagicMock
) -> None:
mock_response = MagicMock()
mock_response.json.return_value = {"elements": []}
mock_post.return_value = mock_response
result, error = _try_single_request("http://example.com", "query")
assert result == {"elements": []}
assert error is None
@patch("python_pkg.geo_data._common.requests.post")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_request_exception(
self, mock_stdout: MagicMock, mock_post: MagicMock
) -> None:
import requests
mock_post.side_effect = requests.RequestException("fail")
result, error = _try_single_request("http://example.com", "query")
assert result is None
assert isinstance(error, requests.RequestException)
@patch("python_pkg.geo_data._common.requests.post")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_invalid_response_format(
self, mock_stdout: MagicMock, mock_post: MagicMock
) -> None:
mock_response = MagicMock()
mock_response.json.return_value = {"no_elements": True}
mock_post.return_value = mock_response
result, error = _try_single_request("http://example.com", "query")
assert result is None
assert isinstance(error, ValueError)
@patch("python_pkg.geo_data._common.requests.post")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_non_dict_response(
self, mock_stdout: MagicMock, mock_post: MagicMock
) -> None:
mock_response = MagicMock()
mock_response.json.return_value = [1, 2, 3]
mock_post.return_value = mock_response
result, error = _try_single_request("http://example.com", "query")
assert result is None
assert isinstance(error, ValueError)
@patch("python_pkg.geo_data._common.requests.post")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_value_error_on_json_parse(
self, mock_stdout: MagicMock, mock_post: MagicMock
) -> None:
mock_response = MagicMock()
mock_response.json.side_effect = ValueError("bad json")
mock_post.return_value = mock_response
result, error = _try_single_request("http://example.com", "query")
assert result is None
assert isinstance(error, ValueError)
@patch("python_pkg.geo_data._common.requests.post")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_timeout_error(self, mock_stdout: MagicMock, mock_post: MagicMock) -> None:
import requests
mock_post.side_effect = requests.Timeout("timeout")
result, error = _try_single_request("http://example.com", "query")
assert result is None
assert isinstance(error, requests.Timeout)
class TestOverpassQuery:
"""Tests for _overpass_query."""
@patch("python_pkg.geo_data._common._try_single_request")
def test_success_on_first_try(self, mock_req: MagicMock) -> None:
mock_req.return_value = ({"elements": []}, None)
result = _overpass_query("query")
assert result == {"elements": []}
@patch("python_pkg.geo_data._common.time.sleep")
@patch("python_pkg.geo_data._common._try_single_request")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_retries_then_succeeds(
self, mock_stdout: MagicMock, mock_req: MagicMock, mock_sleep: MagicMock
) -> None:
mock_req.side_effect = [
(None, ValueError("fail1")),
({"elements": []}, None),
]
result = _overpass_query("query")
assert result == {"elements": []}
@patch("python_pkg.geo_data._common.time.sleep")
@patch("python_pkg.geo_data._common._try_single_request")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_all_endpoints_fail(
self, mock_stdout: MagicMock, mock_req: MagicMock, mock_sleep: MagicMock
) -> None:
mock_req.return_value = (None, ValueError("fail"))
with pytest.raises(RuntimeError, match="All Overpass API endpoints failed"):
_overpass_query("query")
class TestDownloadGithubGeojson:
"""Tests for _download_github_geojson."""
@patch("python_pkg.geo_data._common.gpd.read_file")
def test_cached_file_exists(self, mock_read: MagicMock) -> None:
mock_gdf = MagicMock()
mock_read.return_value = mock_gdf
cache_path = MagicMock()
cache_path.exists.return_value = True
result = _download_github_geojson("http://example.com/data.geojson", cache_path)
assert result is mock_gdf
mock_read.assert_called_once_with(cache_path)
@patch("python_pkg.geo_data._common.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._common._ensure_cache_dir")
@patch("python_pkg.geo_data._common.requests.get")
@patch("python_pkg.geo_data._common.sys.stdout")
def test_downloads_and_caches(
self,
mock_stdout: MagicMock,
mock_get: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
features_data: dict[str, Any] = {
"features": [
{
"type": "Feature",
"properties": {"name": "test"},
"geometry": {"type": "Point", "coordinates": [0, 0]},
}
]
}
mock_response = MagicMock()
mock_response.json.return_value = features_data
mock_get.return_value = mock_response
mock_gdf = MagicMock()
mock_from_features.return_value = mock_gdf
cache_path = MagicMock()
cache_path.exists.return_value = False
result = _download_github_geojson(
"https://example.com/data.geojson", cache_path
)
assert result is mock_gdf
def test_unsupported_url_scheme(self) -> None:
cache_path = MagicMock()
cache_path.exists.return_value = False
with pytest.raises(ValueError, match="Unsupported URL scheme"):
_download_github_geojson("ftp://example.com/data", cache_path)
class TestExtractOsiedlaRings:
"""Tests for _extract_osiedla_rings."""
def test_outer_and_inner_rings(self) -> None:
element: dict[str, Any] = {
"members": [
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
{
"role": "inner",
"geometry": [
{"lon": 0.2, "lat": 0.2},
{"lon": 0.4, "lat": 0.2},
{"lon": 0.4, "lat": 0.4},
{"lon": 0.2, "lat": 0.4},
],
},
]
}
outer, inner = _extract_osiedla_rings(element, 4)
assert len(outer) == 1
assert len(inner) == 1
def test_ring_too_short(self) -> None:
element: dict[str, Any] = {
"members": [
{
"role": "outer",
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}],
}
]
}
outer, inner = _extract_osiedla_rings(element, 4)
assert len(outer) == 0
assert len(inner) == 0
def test_no_geometry_in_member(self) -> None:
element: dict[str, Any] = {"members": [{"role": "outer"}]}
outer, inner = _extract_osiedla_rings(element, 4)
assert len(outer) == 0
assert len(inner) == 0
def test_already_closed_ring(self) -> None:
element: dict[str, Any] = {
"members": [
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 0},
],
}
]
}
outer, _ = _extract_osiedla_rings(element, 4)
assert len(outer) == 1
# Already closed, so no extra point
assert outer[0][0] == outer[0][-1]
def test_no_members(self) -> None:
element: dict[str, Any] = {}
outer, inner = _extract_osiedla_rings(element, 4)
assert len(outer) == 0
assert len(inner) == 0
def test_unknown_role_ignored(self) -> None:
element: dict[str, Any] = {
"members": [
{
"role": "label",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
]
}
outer, inner = _extract_osiedla_rings(element, 4)
assert len(outer) == 0
assert len(inner) == 0
class TestBuildOsiedlaGeometry:
"""Tests for _build_osiedla_geometry."""
def test_single_outer_ring(self) -> None:
outer = [[(0, 0), (1, 0), (1, 1), (0, 0)]]
inner: list[list[tuple[float, float]]] = []
result = _build_osiedla_geometry(outer, inner)
assert result["type"] == "Polygon"
def test_single_outer_with_inner(self) -> None:
outer = [[(0, 0), (1, 0), (1, 1), (0, 0)]]
inner = [[(0.2, 0.2), (0.4, 0.2), (0.4, 0.4), (0.2, 0.2)]]
result = _build_osiedla_geometry(outer, inner)
assert result["type"] == "Polygon"
assert len(result["coordinates"]) == 2
def test_multiple_outer_rings(self) -> None:
outer = [
[(0, 0), (1, 0), (1, 1), (0, 0)],
[(2, 2), (3, 2), (3, 3), (2, 2)],
]
inner: list[list[tuple[float, float]]] = []
result = _build_osiedla_geometry(outer, inner)
assert result["type"] == "MultiPolygon"
class TestExtractPolygonFromElement:
"""Tests for _extract_polygon_from_element."""
def test_relation_with_rings(self) -> None:
element: dict[str, Any] = {
"type": "relation",
"members": [
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
],
}
result = _extract_polygon_from_element(element)
assert result is not None
assert result["type"] == "Polygon"
def test_relation_without_outer_rings(self) -> None:
element: dict[str, Any] = {
"type": "relation",
"members": [{"role": "inner", "geometry": [{"lon": 0, "lat": 0}]}],
}
result = _extract_polygon_from_element(element)
assert result is None
def test_way_with_enough_coords(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
result = _extract_polygon_from_element(element)
assert result is not None
assert result["type"] == "Polygon"
# Should close the ring
assert result["coordinates"][0][0] == result["coordinates"][0][-1]
def test_way_already_closed(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 0},
],
}
result = _extract_polygon_from_element(element)
assert result is not None
def test_way_too_few_coords(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}],
}
result = _extract_polygon_from_element(element)
assert result is None
def test_way_no_geometry(self) -> None:
element: dict[str, Any] = {"type": "way"}
result = _extract_polygon_from_element(element)
assert result is None
def test_unknown_type(self) -> None:
element: dict[str, Any] = {"type": "node"}
result = _extract_polygon_from_element(element)
assert result is None
class TestExtractLineFromWay:
"""Tests for _extract_line_from_way."""
def test_valid_way(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
}
result = _extract_line_from_way(element)
assert result is not None
assert result["type"] == "LineString"
def test_too_few_coords(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [{"lon": 0, "lat": 0}],
}
result = _extract_line_from_way(element)
assert result is None
def test_not_a_way(self) -> None:
element: dict[str, Any] = {"type": "node"}
result = _extract_line_from_way(element)
assert result is None
def test_way_no_geometry(self) -> None:
element: dict[str, Any] = {"type": "way"}
result = _extract_line_from_way(element)
assert result is None

View File

@ -1,54 +0,0 @@
"""Tests for _add_area_column and _add_length_column (non-empty GDFs)."""
from __future__ import annotations
import geopandas as gpd
from shapely.geometry import LineString, Polygon
from python_pkg.geo_data._common import _add_area_column, _add_length_column
class TestAddAreaColumnNonEmpty:
"""Tests for _add_area_column with non-empty GeoDataFrame."""
def test_adds_area_column(self) -> None:
gdf = gpd.GeoDataFrame(
{"name": ["A"]},
geometry=[Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])],
crs="EPSG:4326",
)
result = _add_area_column(gdf)
assert "area_km2" in result.columns
assert result["area_km2"].iloc[0] > 0
class TestAddLengthColumnNonEmpty:
"""Tests for _add_length_column with non-empty GeoDataFrame."""
def test_adds_length_column(self) -> None:
gdf = gpd.GeoDataFrame(
{"name": ["A"]},
geometry=[LineString([(20, 50), (21, 51)])],
crs="EPSG:4326",
)
result = _add_length_column(gdf)
assert "length_km" in result.columns
assert result["length_km"].iloc[0] > 0
class TestAddAreaColumnEmpty:
"""Tests for _add_area_column with empty GeoDataFrame."""
def test_returns_empty_gdf(self) -> None:
gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
result = _add_area_column(gdf)
assert len(result) == 0
class TestAddLengthColumnEmpty:
"""Tests for _add_length_column with empty GeoDataFrame."""
def test_returns_empty_gdf(self) -> None:
gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
result = _add_length_column(gdf)
assert len(result) == 0

View File

@ -1,85 +0,0 @@
"""Tests for python_pkg.geo_data.__init__ module."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
from python_pkg.geo_data import (
clear_cache,
download_all_poland_data,
download_all_warsaw_data,
)
class TestDownloadAllWarsawData:
"""Tests for download_all_warsaw_data."""
def test_calls_all_warsaw_functions(self) -> None:
with (
patch("python_pkg.geo_data.sys.stdout"),
patch("python_pkg.geo_data.get_warsaw_boundary") as mock_boundary,
patch("python_pkg.geo_data.get_vistula_river") as mock_vistula,
patch("python_pkg.geo_data.get_warsaw_bridges") as mock_bridges,
patch("python_pkg.geo_data.get_warsaw_metro_stations") as mock_metro,
patch("python_pkg.geo_data.get_warsaw_streets") as mock_streets,
patch("python_pkg.geo_data.get_warsaw_landmarks") as mock_landmarks,
patch("python_pkg.geo_data.get_warsaw_osiedla") as mock_osiedla,
):
download_all_warsaw_data()
mock_boundary.assert_called_once()
mock_vistula.assert_called_once()
mock_bridges.assert_called_once()
mock_metro.assert_called_once()
mock_streets.assert_called_once()
mock_landmarks.assert_called_once()
mock_osiedla.assert_called_once()
class TestDownloadAllPolandData:
"""Tests for download_all_poland_data."""
@patch("python_pkg.geo_data.get_poland_boundary")
@patch("python_pkg.geo_data.get_polish_gminy")
@patch("python_pkg.geo_data.get_polish_powiaty")
@patch("python_pkg.geo_data.get_polish_wojewodztwa")
@patch("python_pkg.geo_data.sys.stdout")
def test_calls_all_poland_functions(
self,
mock_stdout: MagicMock,
mock_woj: MagicMock,
mock_powiaty: MagicMock,
mock_gminy: MagicMock,
mock_boundary: MagicMock,
) -> None:
download_all_poland_data()
mock_woj.assert_called_once()
mock_powiaty.assert_called_once()
mock_gminy.assert_called_once()
mock_boundary.assert_called_once()
class TestClearCache:
"""Tests for clear_cache."""
@patch("python_pkg.geo_data.shutil.rmtree")
@patch("python_pkg.geo_data.CACHE_DIR")
@patch("python_pkg.geo_data.sys.stdout")
def test_cache_exists(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_rmtree: MagicMock,
) -> None:
mock_cache_dir.exists.return_value = True
clear_cache()
mock_rmtree.assert_called_once_with(mock_cache_dir)
@patch("python_pkg.geo_data.CACHE_DIR")
@patch("python_pkg.geo_data.sys.stdout")
def test_cache_not_exists(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
) -> None:
mock_cache_dir.exists.return_value = False
clear_cache()

View File

@ -1,313 +0,0 @@
"""Tests for python_pkg.geo_data._poland_admin module."""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import geopandas as gpd
from shapely.geometry import Polygon
from python_pkg.geo_data._poland_admin import (
_get_powiaty_population,
_query_wikidata,
get_poland_boundary,
get_polish_gminy,
get_polish_powiaty,
get_polish_wojewodztwa,
)
class TestQueryWikidata:
"""Tests for _query_wikidata."""
@patch("python_pkg.geo_data._poland_admin.requests.get")
def test_successful_query(self, mock_get: MagicMock) -> None:
mock_response = MagicMock()
mock_response.json.return_value = {
"results": {"bindings": [{"name": {"value": "test"}}]}
}
mock_get.return_value = mock_response
result = _query_wikidata("SELECT ?x WHERE {}")
assert result == [{"name": {"value": "test"}}]
mock_response.raise_for_status.assert_called_once()
class TestGetPowiatyPopulation:
"""Tests for _get_powiaty_population."""
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_path.read_text.return_value = json.dumps({"Kraków": 780000})
result = _get_powiaty_population()
assert result == {"Kraków": 780000}
@patch("python_pkg.geo_data._poland_admin._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_admin._query_wikidata")
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
@patch("python_pkg.geo_data._poland_admin.sys.stdout")
def test_downloads_and_caches(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = [
{
"powiatLabel": {"value": "powiat krakowski"},
"population": {"value": "100000"},
},
{
"powiatLabel": {"value": "powiat wadowicki"},
"population": {"value": "bad_value"},
},
{
"powiatLabel": {"value": ""},
"population": {"value": "50000"},
},
{
"population": {"value": "30000"},
},
]
result = _get_powiaty_population()
assert "krakowski" in result
mock_path.write_text.assert_called_once()
@patch("python_pkg.geo_data._poland_admin._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_admin._query_wikidata")
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
@patch("python_pkg.geo_data._poland_admin.sys.stdout")
def test_empty_label_skipped(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = [
{"powiatLabel": {"value": ""}, "population": {"value": "1000"}},
]
result = _get_powiaty_population()
assert len(result) == 0
class TestGetPolishWojewodztwa:
"""Tests for get_polish_wojewodztwa."""
@patch("python_pkg.geo_data._poland_admin._download_github_geojson")
def test_returns_geodataframe(self, mock_download: MagicMock) -> None:
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_download.return_value = mock_gdf
result = get_polish_wojewodztwa()
assert result is mock_gdf
class TestGetPolishPowiaty:
"""Tests for get_polish_powiaty."""
@patch("python_pkg.geo_data._poland_admin._get_powiaty_population")
@patch("python_pkg.geo_data._poland_admin._download_github_geojson")
def test_with_population(
self, mock_download: MagicMock, mock_pop: MagicMock
) -> None:
gdf = gpd.GeoDataFrame(
{"nazwa": ["powiat krakowski", "powiat Wadowice", "powiat xyz", ""]},
geometry=[
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
],
crs="EPSG:4326",
)
mock_download.return_value = gdf
mock_pop.return_value = {"krakowski": 100000, "wadowice": 50000}
result = get_polish_powiaty()
assert "population" in result.columns
# krakowski matched directly
assert result.iloc[0]["population"] == 100000
# Wadowice matched case-insensitively
assert result.iloc[1]["population"] == 50000
class TestGetPolishGminy:
"""Tests for get_polish_gminy."""
@patch("python_pkg.geo_data._poland_admin.gpd.read_file")
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
def test_cached_with_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{
"name": ["A", "B"],
"area_km2": [200.0, 100.0],
},
geometry=[
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_gminy()
assert result.iloc[0]["area_km2"] == 200.0
@patch("python_pkg.geo_data._poland_admin.gpd.read_file")
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
def test_cached_without_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["A"]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_gminy()
assert len(result) == 1
def test_downloads_from_osm(self) -> None:
with (
patch("python_pkg.geo_data._poland_admin.sys.stdout"),
patch("python_pkg.geo_data._poland_admin.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_admin._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_admin._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_admin.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch("python_pkg.geo_data._common._add_area_column") as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "relation",
"tags": {"name": "Gmina A"},
"members": [
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
],
},
# Duplicate name - should be skipped
{
"type": "relation",
"tags": {"name": "Gmina A"},
"members": [
{
"role": "outer",
"geometry": [
{"lon": 2, "lat": 2},
{"lon": 3, "lat": 2},
{"lon": 3, "lat": 3},
{"lon": 2, "lat": 3},
],
}
],
},
# Not a relation - should be skipped
{"type": "way", "tags": {"name": "Way"}},
# No name
{"type": "relation", "tags": {}},
# No outer rings
{
"type": "relation",
"tags": {"name": "Empty"},
"members": [],
},
]
}
mock_gdf = gpd.GeoDataFrame(
{"name": ["Gmina A"], "area_km2": [100.0]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
mock_add_area.return_value = mock_gdf
result = get_polish_gminy()
assert len(result) == 1
class TestGetPolandBoundary:
"""Tests for get_poland_boundary."""
@patch("python_pkg.geo_data._poland_admin.gpd.read_file")
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_poland_boundary()
assert result is mock_gdf
@patch("python_pkg.geo_data._poland_admin.gpd.GeoDataFrame.to_file")
@patch("python_pkg.geo_data._poland_admin._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_admin.get_polish_wojewodztwa")
@patch("python_pkg.geo_data._poland_admin.CACHE_DIR")
def test_dissolves_from_wojewodztwa(
self,
mock_cache_dir: MagicMock,
mock_woj: MagicMock,
mock_ensure: MagicMock,
mock_to_file: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
woj_gdf = gpd.GeoDataFrame(
{"name": ["woj1", "woj2"]},
geometry=[
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]),
],
crs="EPSG:4326",
)
mock_woj.return_value = woj_gdf
result = get_poland_boundary()
assert len(result) == 1

View File

@ -1,385 +0,0 @@
"""Tests for python_pkg.geo_data._poland_nature module."""
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock, patch
import geopandas as gpd
import pytest
from shapely.geometry import Polygon
from python_pkg.geo_data._poland_nature import (
get_polish_mountain_peaks,
get_polish_mountain_ranges,
get_polish_national_parks,
)
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
"""Create a mock OSM relation element."""
members = []
if include_outer:
members.append(
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
)
return {"type": "relation", "tags": {"name": name}, "members": members}
class TestGetPolishMountainPeaks:
"""Tests for get_polish_mountain_peaks."""
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Rysy", "Babia Góra"], "elevation": [2499.0, 1725.0]},
geometry=[
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_mountain_peaks()
assert result.iloc[0]["elevation"] == 2499.0
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_nature._overpass_query")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
def test_downloads_peaks(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "node",
"tags": {"name": "Rysy", "ele": "2499"},
"lon": 20.0,
"lat": 49.0,
},
# Below threshold
{
"type": "node",
"tags": {"name": "LowPeak", "ele": "100"},
"lon": 20.0,
"lat": 49.0,
},
# Missing ele
{
"type": "node",
"tags": {"name": "NoEle"},
"lon": 20.0,
"lat": 49.0,
},
# Duplicate name
{
"type": "node",
"tags": {"name": "Rysy", "ele": "2499"},
"lon": 20.0,
"lat": 49.0,
},
# Not a node
{
"type": "way",
"tags": {"name": "Way", "ele": "500"},
},
# No name
{
"type": "node",
"tags": {"ele": "500"},
"lon": 20.0,
"lat": 49.0,
},
# Comma in ele
{
"type": "node",
"tags": {"name": "Peak2", "ele": "500,5 m"},
"lon": 20.0,
"lat": 49.0,
},
# Invalid ele
{
"type": "node",
"tags": {"name": "BadEle", "ele": "abc"},
"lon": 20.0,
"lat": 49.0,
},
]
}
mock_gdf = gpd.GeoDataFrame(
{"name": ["Rysy", "Peak2"], "elevation": [2499.0, 500.5]},
geometry=[
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]),
],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
result = get_polish_mountain_peaks()
assert result.iloc[0]["elevation"] == 2499.0
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_nature._overpass_query")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
def test_no_peaks_raises(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
with pytest.raises(ValueError, match="No mountain peaks found"):
get_polish_mountain_peaks()
class TestGetPolishMountainRanges:
"""Tests for get_polish_mountain_ranges."""
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_with_area(
self,
mock_cache_dir: MagicMock,
mock_read: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
mock_gdf = gpd.GeoDataFrame(
{"name": ["Tatry"], "area_km2": [100.0]},
geometry=[poly],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_mountain_ranges()
assert "area_km2" in result.columns
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_without_area(
self,
mock_cache_dir: MagicMock,
mock_read: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
mock_gdf = gpd.GeoDataFrame(
{"name": ["Tatry"]},
geometry=[poly],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_mountain_ranges()
assert len(result) >= 0
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_nature._overpass_query")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
def test_downloads_ranges(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
# Relation
_make_relation_element("Tatry"),
# Way with enough coords
{
"type": "way",
"tags": {"name": "Bieszczady"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# Way with auto-close
{
"type": "way",
"tags": {"name": "Karkonosze"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 0.5},
],
},
# Way already closed (first == last)
{
"type": "way",
"tags": {"name": "Sudety"},
"geometry": [
{"lon": 2, "lat": 2},
{"lon": 3, "lat": 2},
{"lon": 3, "lat": 3},
{"lon": 2, "lat": 2},
],
},
# Way too few coords
{
"type": "way",
"tags": {"name": "Short"},
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 0}],
},
# Duplicate
_make_relation_element("Tatry"),
# No name
_make_relation_element(""),
# Unknown type
{"type": "node", "tags": {"name": "Ignored"}},
# Way without geometry
{"type": "way", "tags": {"name": "NoGeom"}},
# Relation without outer rings
_make_relation_element("NoOuter", include_outer=False),
]
}
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
mock_gdf = gpd.GeoDataFrame(
{"name": ["Tatry", "Bieszczady", "Karkonosze", "Sudety"]},
geometry=[poly, poly, poly, poly],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
result = get_polish_mountain_ranges()
assert len(result) >= 0
class TestGetPolishNationalParks:
"""Tests for get_polish_national_parks."""
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_with_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Tatrzański Park Narodowy"], "area_km2": [200.0]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_national_parks()
assert result.iloc[0]["area_km2"] == 200.0
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_without_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Tatrzański Park Narodowy"]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_national_parks()
assert len(result) == 1
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_nature._overpass_query")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
def test_downloads_parks(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
_make_relation_element("Tatrzański Park Narodowy"),
# Not a national park (missing "Narodowy")
_make_relation_element("Some Reserve"),
# Not a relation
{"type": "way", "tags": {"name": "Park Narodowy X"}},
# No name
{"type": "relation", "tags": {}, "members": []},
# Duplicate
_make_relation_element("Tatrzański Park Narodowy"),
# No outer rings
_make_relation_element("Empty Park Narodowy", include_outer=False),
# Case insensitive match
_make_relation_element("park narodowy Biebrzy"),
]
}
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
mock_gdf = gpd.GeoDataFrame(
{"name": ["Tatrzański Park Narodowy", "park narodowy Biebrzy"]},
geometry=[poly, poly],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
result = get_polish_national_parks()
assert len(result) >= 0

View File

@ -1,418 +0,0 @@
"""Tests for forests, nature reserves, and landscape parks download paths."""
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock, patch
import geopandas as gpd
from shapely.geometry import Polygon
from python_pkg.geo_data._poland_nature import (
get_polish_forests,
get_polish_landscape_parks,
get_polish_nature_reserves,
)
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
"""Create a mock OSM relation element."""
members = []
if include_outer:
members.append(
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
)
return {"type": "relation", "tags": {"name": name}, "members": members}
_POLY = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
class TestGetPolishForests:
"""Tests for get_polish_forests."""
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_with_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Puszcza Białowieska"], "area_km2": [600.0]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_forests()
assert result.iloc[0]["area_km2"] == 600.0
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_without_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Puszcza Białowieska"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_forests()
assert len(result) == 1
def test_downloads_forests(self) -> None:
with (
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_nature._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
# Valid forest with keyword
{
"type": "way",
"tags": {"name": "Puszcza Białowieska"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# Bory keyword
{
"type": "way",
"tags": {"name": "Bory Tucholskie"},
"geometry": [
{"lon": 2, "lat": 2},
{"lon": 3, "lat": 2},
{"lon": 3, "lat": 3},
{"lon": 2, "lat": 3},
],
},
# No forest keyword -> skip
{
"type": "way",
"tags": {"name": "Random Wood"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# Duplicate
{
"type": "way",
"tags": {"name": "Puszcza Białowieska"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# No name
{"type": "way", "tags": {}, "geometry": []},
# Geometry extraction fails (too few coords)
{
"type": "way",
"tags": {"name": "Las Mały"},
"geometry": [{"lon": 0, "lat": 0}],
},
]
}
mock_gdf = gpd.GeoDataFrame(
{"name": ["Puszcza Białowieska", "Bory Tucholskie"]},
geometry=[_POLY, _POLY],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
gdf_with_area = mock_gdf.copy()
gdf_with_area["area_km2"] = [600.0, 300.0]
mock_add_area.return_value = gdf_with_area
result = get_polish_forests()
assert len(result) == 2
def test_downloads_forests_empty(self) -> None:
with (
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_nature._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
mock_from_features.return_value = empty_gdf
mock_add_area.return_value = empty_gdf
result = get_polish_forests()
assert len(result) == 0
class TestGetPolishNatureReserves:
"""Tests for get_polish_nature_reserves."""
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_with_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Rezerwat X"], "area_km2": [50.0]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_nature_reserves()
assert result.iloc[0]["area_km2"] == 50.0
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_without_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Rezerwat X"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_nature_reserves()
assert len(result) == 1
def test_downloads_reserves(self) -> None:
with (
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_nature._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "way",
"tags": {"name": "Rezerwat A"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# Duplicate
{
"type": "way",
"tags": {"name": "Rezerwat A"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# No name
{"type": "way", "tags": {}, "geometry": []},
# Geometry fails
{
"type": "way",
"tags": {"name": "Tiny"},
"geometry": [{"lon": 0, "lat": 0}],
},
]
}
mock_gdf = gpd.GeoDataFrame(
{"name": ["Rezerwat A"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
gdf_with_area = mock_gdf.copy()
gdf_with_area["area_km2"] = [50.0]
mock_add_area.return_value = gdf_with_area
result = get_polish_nature_reserves()
assert len(result) == 1
def test_downloads_reserves_empty(self) -> None:
with (
patch("python_pkg.geo_data._poland_nature.sys.stdout"),
patch("python_pkg.geo_data._poland_nature.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_nature._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_nature._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_nature._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
mock_from_features.return_value = empty_gdf
mock_add_area.return_value = empty_gdf
result = get_polish_nature_reserves()
assert len(result) == 0
class TestGetPolishLandscapeParks:
"""Tests for get_polish_landscape_parks."""
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_with_area(
self,
mock_cache_dir: MagicMock,
mock_read: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Park Krajobrazowy X"], "area_km2": [100.0]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_landscape_parks()
assert result.iloc[0]["area_km2"] == 100.0
@patch("python_pkg.geo_data._poland_nature.gpd.read_file")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
def test_cached_without_area(
self,
mock_cache_dir: MagicMock,
mock_read: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Park Krajobrazowy X"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_landscape_parks()
assert len(result) == 1
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_nature._overpass_query")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
def test_downloads_landscape_parks(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
_make_relation_element("Park Krajobrazowy A"),
# Not a relation -> skip
{
"type": "way",
"tags": {"name": "Park Krajobrazowy B"},
"geometry": [],
},
# No name
{"type": "relation", "tags": {}, "members": []},
# Duplicate
_make_relation_element("Park Krajobrazowy A"),
# No outer rings
_make_relation_element("Park Empty", include_outer=False),
]
}
mock_gdf = gpd.GeoDataFrame(
{"name": ["Park Krajobrazowy A"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
result = get_polish_landscape_parks()
assert len(result) == 1
@patch("python_pkg.geo_data._poland_nature.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._poland_nature._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_nature._overpass_query")
@patch("python_pkg.geo_data._poland_nature.CACHE_DIR")
@patch("python_pkg.geo_data._poland_nature.sys.stdout")
def test_downloads_landscape_parks_empty(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
mock_from_features.return_value = empty_gdf
result = get_polish_landscape_parks()
assert len(result) == 0

View File

@ -1,466 +0,0 @@
"""Tests for python_pkg.geo_data._poland_water module."""
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock, patch
import geopandas as gpd
from shapely.geometry import Polygon
from python_pkg.geo_data._poland_water import (
_extract_coastal_geometry,
_extract_river_coords_from_element,
get_polish_lakes,
get_polish_rivers,
)
class TestExtractCoastalGeometry:
"""Tests for _extract_coastal_geometry."""
def test_relation_delegated(self) -> None:
element: dict[str, Any] = {
"type": "relation",
"members": [
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
],
}
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
assert result is not None
def test_way_line_type(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
}
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
assert result is not None
assert result["type"] == "LineString"
def test_way_polygon_type(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
assert result is not None
assert result["type"] == "Polygon"
def test_way_polygon_auto_close(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 0.5},
],
}
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
assert result is not None
assert result["coordinates"][0][0] == result["coordinates"][0][-1]
def test_way_polygon_already_closed(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 0},
],
}
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
assert result is not None
assert result["type"] == "Polygon"
assert len(result["coordinates"][0]) == 4
def test_way_too_short_for_polygon_not_line(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
],
}
# 3 coords, >= MIN_LINE_COORDS but < MIN_RING_COORDS for polygon
result = _extract_coastal_geometry(element, "peninsula", ("cliff", "beach"))
# 3 coords is not enough for ring (need 4), so returns None
assert result is None
def test_way_too_few_coords(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [{"lon": 0, "lat": 0}],
}
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
assert result is None
def test_not_way_or_relation(self) -> None:
element: dict[str, Any] = {"type": "node"}
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
assert result is None
def test_way_no_geometry(self) -> None:
element: dict[str, Any] = {"type": "way"}
result = _extract_coastal_geometry(element, "cliff", ("cliff", "beach"))
assert result is None
class TestExtractRiverCoordsFromElement:
"""Tests for _extract_river_coords_from_element."""
def test_way_element(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
}
result = _extract_river_coords_from_element(element)
assert len(result) == 1
def test_way_too_few_coords(self) -> None:
element: dict[str, Any] = {
"type": "way",
"geometry": [{"lon": 0, "lat": 0}],
}
result = _extract_river_coords_from_element(element)
assert len(result) == 0
def test_relation_element(self) -> None:
element: dict[str, Any] = {
"type": "relation",
"members": [
{
"type": "way",
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
},
{
"type": "way",
"geometry": [{"lon": 1, "lat": 1}, {"lon": 2, "lat": 2}],
},
# Too few coords
{
"type": "way",
"geometry": [{"lon": 0, "lat": 0}],
},
# Not a way
{
"type": "node",
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
},
# No geometry
{"type": "way"},
],
}
result = _extract_river_coords_from_element(element)
assert len(result) == 2
def test_unknown_type(self) -> None:
element: dict[str, Any] = {"type": "node"}
result = _extract_river_coords_from_element(element)
assert len(result) == 0
def test_way_no_geometry(self) -> None:
element: dict[str, Any] = {"type": "way"}
result = _extract_river_coords_from_element(element)
assert len(result) == 0
class TestGetPolishLakes:
"""Tests for get_polish_lakes."""
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_with_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Śniardwy"], "area_km2": [113.0]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_lakes()
assert result.iloc[0]["area_km2"] == 113.0
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_without_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Śniardwy"]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_lakes()
assert len(result) == 1
def test_downloads_lakes(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "way",
"tags": {"name": "Śniardwy"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# Duplicate
{
"type": "way",
"tags": {"name": "Śniardwy"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# No name
{"type": "way", "tags": {}, "geometry": []},
# Geometry extraction fails
{
"type": "way",
"tags": {"name": "Tiny"},
"geometry": [{"lon": 0, "lat": 0}],
},
]
}
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
mock_gdf = gpd.GeoDataFrame(
{"name": ["Śniardwy"]},
geometry=[poly],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
gdf_with_area = mock_gdf.copy()
gdf_with_area["area_km2"] = [113.0]
mock_add_area.return_value = gdf_with_area
result = get_polish_lakes()
assert len(result) >= 0
def test_empty_result(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
mock_from_features.return_value = empty_gdf
mock_add_area.return_value = empty_gdf
result = get_polish_lakes()
assert len(result) == 0
class TestGetPolishRivers:
"""Tests for get_polish_rivers."""
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_with_length(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Wisła"], "length_km": [1047.0]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_rivers()
assert result.iloc[0]["length_km"] == 1047.0
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_without_length(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Wisła"]},
geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_rivers()
assert len(result) == 1
def test_downloads_rivers(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_length_column"
) as mock_add_length,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
# Way with wikidata
{
"type": "way",
"id": 1,
"tags": {"name": "Wisła", "wikidata": "Q54"},
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
},
# Way without wikidata
{
"type": "way",
"id": 2,
"tags": {"name": "Odra"},
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
},
# Relation
{
"type": "relation",
"id": 3,
"tags": {"name": "Bug", "wikidata": "Q55"},
"members": [
{
"type": "way",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 1},
],
},
{
"type": "way",
"geometry": [
{"lon": 1, "lat": 1},
{"lon": 2, "lat": 2},
],
},
],
},
# No name
{
"type": "way",
"id": 4,
"tags": {},
"geometry": [{"lon": 0, "lat": 0}, {"lon": 1, "lat": 1}],
},
# Way with no coords
{
"type": "way",
"id": 5,
"tags": {"name": "Short"},
"geometry": [{"lon": 0, "lat": 0}],
},
]
}
poly = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
mock_gdf = gpd.GeoDataFrame(
{"name": ["Wisła", "Odra", "Bug"]},
geometry=[poly, poly, poly],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
gdf_with_length = mock_gdf.copy()
gdf_with_length["length_km"] = [1047.0, 854.0, 772.0]
mock_add_length.return_value = gdf_with_length
result = get_polish_rivers()
assert len(result) >= 0
def test_empty_result(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_length_column"
) as mock_add_length,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
mock_from_features.return_value = empty_gdf
mock_add_length.return_value = empty_gdf
result = get_polish_rivers()
assert len(result) == 0

View File

@ -1,397 +0,0 @@
"""Tests for islands, coastal features, and UNESCO sites download paths."""
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock, patch
import geopandas as gpd
from shapely.geometry import Polygon
from python_pkg.geo_data._poland_water import (
get_polish_coastal_features,
get_polish_islands,
get_polish_unesco_sites,
)
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
"""Create a mock OSM relation element."""
members = []
if include_outer:
members.append(
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
)
return {"type": "relation", "tags": {"name": name}, "members": members}
_POLY = Polygon([(20, 50), (21, 50), (21, 51), (20, 51)])
class TestGetPolishIslands:
"""Tests for get_polish_islands."""
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_with_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Wolin"], "area_km2": [265.0]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_islands()
assert result.iloc[0]["area_km2"] == 265.0
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_without_area(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Wolin"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_islands()
assert len(result) == 1
def test_downloads_islands(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "way",
"tags": {"name": "Wolin"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# Duplicate
{
"type": "way",
"tags": {"name": "Wolin"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# No name
{"type": "way", "tags": {}, "geometry": []},
# Geometry fails
{
"type": "way",
"tags": {"name": "Tiny"},
"geometry": [{"lon": 0, "lat": 0}],
},
]
}
mock_gdf = gpd.GeoDataFrame(
{"name": ["Wolin"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
gdf_with_area = mock_gdf.copy()
gdf_with_area["area_km2"] = [265.0]
mock_add_area.return_value = gdf_with_area
result = get_polish_islands()
assert len(result) == 1
def test_downloads_islands_empty(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_area_column"
) as mock_add_area,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
mock_from_features.return_value = empty_gdf
mock_add_area.return_value = empty_gdf
result = get_polish_islands()
assert len(result) == 0
class TestGetPolishCoastalFeatures:
"""Tests for get_polish_coastal_features."""
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_with_length(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Mierzeja Helska"], "length_km": [35.0]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_coastal_features()
assert result.iloc[0]["length_km"] == 35.0
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached_without_length(
self, mock_cache_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Mierzeja Helska"]},
geometry=[_POLY],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_polish_coastal_features()
assert len(result) == 1
def test_downloads_coastal_features(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_length_column"
) as mock_add_length,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
# Peninsula (polygon type)
{
"type": "way",
"tags": {"name": "Hel", "natural": "peninsula"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# Cliff (line type)
{
"type": "way",
"tags": {"name": "Klif Orłowski", "natural": "cliff"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 1},
],
},
# Duplicate
{
"type": "way",
"tags": {"name": "Hel", "natural": "peninsula"},
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
},
# No name
{
"type": "way",
"tags": {"natural": "cliff"},
"geometry": [],
},
# Geometry fails (no geometry key)
{
"type": "node",
"tags": {"name": "X", "natural": "cliff"},
},
]
}
mock_gdf = gpd.GeoDataFrame(
{"name": ["Hel", "Klif Orłowski"]},
geometry=[_POLY, _POLY],
crs="EPSG:4326",
)
mock_from_features.return_value = mock_gdf
gdf_with_length = mock_gdf.copy()
gdf_with_length["length_km"] = [35.0, 5.0]
mock_add_length.return_value = gdf_with_length
result = get_polish_coastal_features()
assert len(result) == 2
def test_downloads_coastal_features_empty(self) -> None:
with (
patch("python_pkg.geo_data._poland_water.sys.stdout"),
patch("python_pkg.geo_data._poland_water.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._poland_water._overpass_query") as mock_query,
patch("python_pkg.geo_data._poland_water._ensure_cache_dir"),
patch(
"python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._poland_water._add_length_column"
) as mock_add_length,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
empty_gdf = gpd.GeoDataFrame({"name": [], "geometry": []})
mock_from_features.return_value = empty_gdf
mock_add_length.return_value = empty_gdf
result = get_polish_coastal_features()
assert len(result) == 0
class TestGetPolishUnescoSites:
"""Tests for get_polish_unesco_sites."""
@patch("python_pkg.geo_data._poland_water.gpd.read_file")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_polish_unesco_sites()
assert result is mock_gdf
@patch("python_pkg.geo_data._poland_water.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._poland_water._ensure_cache_dir")
@patch("python_pkg.geo_data._poland_water._overpass_query")
@patch("python_pkg.geo_data._poland_water.CACHE_DIR")
@patch("python_pkg.geo_data._poland_water.sys.stdout")
def test_downloads_unesco_sites(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
# Node type
{
"type": "node",
"tags": {"name": "Kopalnia Soli Wieliczka"},
"lon": 20.0,
"lat": 50.0,
},
# Relation type
_make_relation_element("Stare Miasto w Krakowie"),
# Way type with enough coords
{
"type": "way",
"tags": {"name": "Auschwitz"},
"geometry": [
{"lon": 19, "lat": 50},
{"lon": 19.1, "lat": 50},
{"lon": 19.1, "lat": 50.1},
{"lon": 19, "lat": 50.1},
],
},
# Way already closed
{
"type": "way",
"tags": {"name": "Zamość"},
"geometry": [
{"lon": 23, "lat": 50.7},
{"lon": 23.1, "lat": 50.7},
{"lon": 23.1, "lat": 50.8},
{"lon": 23, "lat": 50.7},
],
},
# Way too few coords
{
"type": "way",
"tags": {"name": "TooShort"},
"geometry": [
{"lon": 19, "lat": 50},
{"lon": 19.1, "lat": 50},
],
},
# Duplicate
{
"type": "node",
"tags": {"name": "Kopalnia Soli Wieliczka"},
"lon": 20.0,
"lat": 50.0,
},
# No name
{"type": "node", "tags": {}, "lon": 0, "lat": 0},
# Unknown type
{"type": "area", "tags": {"name": "Ignored"}},
# Relation without outer rings
_make_relation_element("NoOuter", include_outer=False),
# Way without geometry key
{"type": "way", "tags": {"name": "NoGeom"}},
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
result = get_polish_unesco_sites()
assert result is mock_gdf

View File

@ -1,424 +0,0 @@
"""Tests for python_pkg.geo_data._warsaw module."""
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock, patch
import geopandas as gpd
from shapely.geometry import LineString, Polygon
from python_pkg.geo_data._warsaw import (
_merge_bridge_segments,
get_vistula_river,
get_warsaw_boundary,
get_warsaw_bridges,
get_warsaw_districts,
)
class TestGetWarsawBoundary:
"""Tests for get_warsaw_boundary."""
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_warsaw_boundary()
assert result is mock_gdf
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.to_file")
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
def test_from_districts_file_with_warszawa(
self,
mock_cache_dir: MagicMock,
mock_pkg_dir: MagicMock,
mock_read: MagicMock,
mock_ensure: MagicMock,
mock_to_file: MagicMock,
) -> None:
mock_cache_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path)
mock_cache_path.exists.return_value = False
mock_districts_path = MagicMock()
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
return_value=MagicMock()
)
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
MagicMock(return_value=mock_districts_path)
)
mock_districts_path.exists.return_value = True
mock_warsaw_gdf = gpd.GeoDataFrame(
{"name": ["Warszawa", "Mokotów"]},
geometry=[
Polygon([(20, 52), (21, 52), (21, 53), (20, 53)]),
Polygon([(20.5, 52.5), (20.6, 52.5), (20.6, 52.6), (20.5, 52.6)]),
],
crs="EPSG:4326",
)
mock_read.return_value = mock_warsaw_gdf
result = get_warsaw_boundary()
assert len(result) == 1
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.to_file")
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
def test_from_districts_file_no_warszawa_entry(
self,
mock_cache_dir: MagicMock,
mock_pkg_dir: MagicMock,
mock_read: MagicMock,
mock_ensure: MagicMock,
mock_to_file: MagicMock,
) -> None:
mock_cache_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path)
mock_cache_path.exists.return_value = False
mock_districts_path = MagicMock()
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
return_value=MagicMock()
)
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
MagicMock(return_value=mock_districts_path)
)
mock_districts_path.exists.return_value = True
# No "Warszawa" entry
mock_warsaw_gdf = gpd.GeoDataFrame(
{"name": ["Mokotów", "Śródmieście"]},
geometry=[
Polygon([(20, 52), (21, 52), (21, 53), (20, 53)]),
Polygon([(20.5, 52.5), (20.6, 52.5), (20.6, 52.6), (20.5, 52.6)]),
],
crs="EPSG:4326",
)
mock_read.return_value = mock_warsaw_gdf
result = get_warsaw_boundary()
assert len(result) == 1
def test_fallback_overpass(self) -> None:
with (
patch("python_pkg.geo_data._warsaw.sys.stdout"),
patch("python_pkg.geo_data._warsaw.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._warsaw._PKG_DIR") as mock_pkg_dir,
patch("python_pkg.geo_data._warsaw._overpass_query") as mock_query,
patch("python_pkg.geo_data._warsaw._ensure_cache_dir"),
patch(
"python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features"
) as mock_from_features,
):
mock_cache_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_cache_path)
mock_cache_path.exists.return_value = False
mock_districts_path = MagicMock()
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
return_value=MagicMock()
)
nested = mock_pkg_dir.__truediv__.return_value.__truediv__
nested.return_value.__truediv__ = MagicMock(
return_value=mock_districts_path
)
mock_districts_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "relation",
"members": [
{
"role": "outer",
"geometry": [
{"lon": 20, "lat": 52},
{"lon": 21, "lat": 52},
{"lon": 21, "lat": 53},
],
},
# non-outer member
{
"role": "inner",
"geometry": [
{"lon": 20.5, "lat": 52.5},
],
},
],
},
# Not a relation
{"type": "way"},
# Relation with no outer geometry (empty coords)
{
"type": "relation",
"members": [
{"role": "inner", "geometry": [{"lon": 20, "lat": 52}]},
],
},
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
result = get_warsaw_boundary()
assert result is mock_gdf
class TestGetWarsawDistricts:
"""Tests for get_warsaw_districts."""
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
def test_districts_file_exists(
self, mock_pkg_dir: MagicMock, mock_read: MagicMock
) -> None:
mock_districts_path = MagicMock()
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
return_value=MagicMock()
)
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
MagicMock(return_value=mock_districts_path)
)
mock_districts_path.exists.return_value = True
mock_gdf = gpd.GeoDataFrame(
{"name": ["Warszawa", "Mokotów", "Śródmieście"]},
geometry=[
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
],
crs="EPSG:4326",
)
mock_read.return_value = mock_gdf
result = get_warsaw_districts()
assert "Warszawa" not in result["name"].values
@patch("python_pkg.geo_data._warsaw._PKG_DIR")
def test_districts_file_not_found(self, mock_pkg_dir: MagicMock) -> None:
mock_districts_path = MagicMock()
mock_pkg_dir.__truediv__ = MagicMock(return_value=MagicMock())
mock_pkg_dir.__truediv__.return_value.__truediv__ = MagicMock(
return_value=MagicMock()
)
mock_pkg_dir.__truediv__.return_value.__truediv__.return_value.__truediv__ = (
MagicMock(return_value=mock_districts_path)
)
mock_districts_path.exists.return_value = False
import pytest
with pytest.raises(FileNotFoundError, match="Warsaw districts GeoJSON"):
get_warsaw_districts()
class TestGetVistulaRiver:
"""Tests for get_vistula_river."""
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_vistula_river()
assert result is mock_gdf
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
@patch("python_pkg.geo_data._warsaw._overpass_query")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
@patch("python_pkg.geo_data._warsaw.sys.stdout")
def test_downloads(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "way",
"geometry": [
{"lon": 20.0, "lat": 52.0},
{"lon": 21.0, "lat": 52.5},
],
},
# Too few coords
{
"type": "way",
"geometry": [{"lon": 20.0, "lat": 52.0}],
},
# Not a way
{"type": "node"},
# Way without geometry
{"type": "way"},
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
result = get_vistula_river()
assert result is mock_gdf
class TestGetWarsawBridges:
"""Tests for get_warsaw_bridges."""
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_warsaw_bridges()
assert result is mock_gdf
def test_downloads(self) -> None:
with (
patch("python_pkg.geo_data._warsaw.sys.stdout"),
patch("python_pkg.geo_data._warsaw.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._warsaw.get_vistula_river") as mock_vistula,
patch("python_pkg.geo_data._warsaw._overpass_query") as mock_query,
patch("python_pkg.geo_data._warsaw._ensure_cache_dir"),
patch(
"python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features"
) as mock_from_features,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
# Create a real Vistula geometry for intersection tests
vistula_gdf = gpd.GeoDataFrame(
{"name": ["Wisła"]},
geometry=[LineString([(20.0, 52.2), (21.0, 52.2)])],
crs="EPSG:4326",
)
mock_vistula.return_value = vistula_gdf
mock_query.return_value = {
"elements": [
# Bridge that intersects vistula buffer
{
"type": "way",
"id": 1,
"tags": {"name": "Most Łazienkowski"},
"geometry": [
{"lon": 20.5, "lat": 52.19},
{"lon": 20.5, "lat": 52.21},
],
},
# Bridge far from vistula
{
"type": "way",
"id": 2,
"tags": {"name": "Most Daleki"},
"geometry": [
{"lon": 20.5, "lat": 55.0},
{"lon": 20.5, "lat": 55.1},
],
},
# Not a way
{"type": "node", "tags": {"name": "Most X"}},
# Way without geometry
{"type": "way", "tags": {"name": "Most Y"}},
# No name
{
"type": "way",
"id": 3,
"tags": {},
"geometry": [
{"lon": 20.5, "lat": 52.19},
{"lon": 20.5, "lat": 52.21},
],
},
# Duplicate
{
"type": "way",
"id": 4,
"tags": {"name": "Most Łazienkowski"},
"geometry": [
{"lon": 20.5, "lat": 52.19},
{"lon": 20.5, "lat": 52.21},
],
},
# Too few coords
{
"type": "way",
"id": 5,
"tags": {"name": "Most Short"},
"geometry": [{"lon": 20.5, "lat": 52.19}],
},
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
result = get_warsaw_bridges()
assert result is mock_gdf
class TestMergeBridgeSegments:
"""Tests for _merge_bridge_segments."""
def test_single_segment(self) -> None:
features: list[dict[str, Any]] = [
{
"properties": {"name": "Most A"},
"geometry": {"coordinates": [(20, 52), (21, 52)]},
}
]
result = _merge_bridge_segments(features)
assert len(result) == 1
assert result[0]["geometry"]["type"] == "LineString"
def test_multiple_segments_same_name(self) -> None:
features: list[dict[str, Any]] = [
{
"properties": {"name": "Most A"},
"geometry": {"coordinates": [(20, 52), (21, 52)]},
},
{
"properties": {"name": "Most A"},
"geometry": {"coordinates": [(21, 52), (22, 52)]},
},
]
result = _merge_bridge_segments(features)
assert len(result) == 1
assert result[0]["geometry"]["type"] == "MultiLineString"

View File

@ -1,176 +0,0 @@
"""Tests for metro stations and osiedla download paths."""
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock, patch
import geopandas as gpd
from python_pkg.geo_data._warsaw import (
get_warsaw_metro_stations,
get_warsaw_osiedla,
)
def _make_relation_element(name: str, *, include_outer: bool = True) -> dict[str, Any]:
"""Create a mock OSM relation element."""
members = []
if include_outer:
members.append(
{
"role": "outer",
"geometry": [
{"lon": 0, "lat": 0},
{"lon": 1, "lat": 0},
{"lon": 1, "lat": 1},
{"lon": 0, "lat": 1},
],
}
)
return {"type": "relation", "tags": {"name": name}, "members": members}
class TestGetWarsawMetroStations:
"""Tests for get_warsaw_metro_stations."""
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_warsaw_metro_stations()
assert result is mock_gdf
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
@patch("python_pkg.geo_data._warsaw._overpass_query")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
@patch("python_pkg.geo_data._warsaw.sys.stdout")
def test_downloads_metro(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
# M1 only station
{
"type": "node",
"tags": {"name": "Kabaty"},
"lon": 21.0,
"lat": 52.1,
},
# M2 only station
{
"type": "node",
"tags": {"name": "Bródno"},
"lon": 21.0,
"lat": 52.3,
},
# M1/M2 interchange
{
"type": "node",
"tags": {"name": "Świętokrzyska"},
"lon": 21.0,
"lat": 52.2,
},
# Unknown station
{
"type": "node",
"tags": {"name": "Nowa Stacja"},
"lon": 21.0,
"lat": 52.4,
},
# Not a node -> skip
{
"type": "way",
"tags": {"name": "Metro Line"},
},
# Node without name -> skip
{
"type": "node",
"tags": {},
"lon": 21.0,
"lat": 52.0,
},
# Duplicate
{
"type": "node",
"tags": {"name": "Kabaty"},
"lon": 21.0,
"lat": 52.1,
},
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
result = get_warsaw_metro_stations()
assert result is mock_gdf
class TestGetWarsawOsiedla:
"""Tests for get_warsaw_osiedla."""
@patch("python_pkg.geo_data._warsaw.gpd.read_file")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_warsaw_osiedla()
assert result is mock_gdf
@patch("python_pkg.geo_data._warsaw.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._warsaw._ensure_cache_dir")
@patch("python_pkg.geo_data._warsaw._overpass_query")
@patch("python_pkg.geo_data._warsaw.CACHE_DIR")
@patch("python_pkg.geo_data._warsaw.sys.stdout")
def test_downloads_osiedla(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
_make_relation_element("Mokotów"),
# Not a relation -> skip
{
"type": "way",
"tags": {"name": "Way Osiedle"},
},
# No name
{"type": "relation", "tags": {}, "members": []},
# Duplicate
_make_relation_element("Mokotów"),
# No outer rings
_make_relation_element("Empty", include_outer=False),
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
result = get_warsaw_osiedla()
assert result is mock_gdf

View File

@ -1,269 +0,0 @@
"""Tests for python_pkg.geo_data._warsaw_places module."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import geopandas as gpd
from shapely.geometry import LineString
from python_pkg.geo_data._warsaw_places import (
_filter_streets_by_length,
get_warsaw_landmarks,
get_warsaw_streets,
)
class TestGetWarsawStreets:
"""Tests for get_warsaw_streets."""
@patch("python_pkg.geo_data._warsaw_places._filter_streets_by_length")
@patch("python_pkg.geo_data._warsaw_places.gpd.read_file")
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
def test_cached(
self,
mock_cache_dir: MagicMock,
mock_read: MagicMock,
mock_filter: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
mock_filter.return_value = mock_gdf
result = get_warsaw_streets()
assert result is mock_gdf
def test_downloads(self) -> None:
with (
patch("python_pkg.geo_data._warsaw_places.sys.stdout"),
patch("python_pkg.geo_data._warsaw_places.CACHE_DIR") as mock_cache_dir,
patch("python_pkg.geo_data._warsaw_places._overpass_query") as mock_query,
patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir"),
patch(
"python_pkg.geo_data._warsaw_places.gpd.GeoDataFrame.from_features"
) as mock_from_features,
patch(
"python_pkg.geo_data._warsaw_places._filter_streets_by_length"
) as mock_filter,
):
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
{
"type": "way",
"tags": {"name": "Marszałkowska", "highway": "primary"},
"geometry": [
{"lon": 21.0, "lat": 52.2},
{"lon": 21.0, "lat": 52.3},
],
},
# Too few coords
{
"type": "way",
"tags": {"name": "Short"},
"geometry": [{"lon": 21.0, "lat": 52.2}],
},
# Not a way
{"type": "node", "tags": {"name": "Node"}},
# Way without geometry
{"type": "way", "tags": {"name": "NoGeom"}},
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
mock_filter.return_value = mock_gdf
result = get_warsaw_streets()
assert result is mock_gdf
class TestFilterStreetsByLength:
"""Tests for _filter_streets_by_length."""
def test_filters_and_merges(self) -> None:
gdf = gpd.GeoDataFrame(
{
"name": ["Marszałkowska", "Marszałkowska", "Unknown", "Short"],
"geometry": [
LineString([(21.0, 52.2), (21.0, 52.3)]),
LineString([(21.0, 52.3), (21.0, 52.4)]),
LineString([(21.0, 52.2), (21.0, 52.3)]),
LineString([(21.0, 52.2), (21.001, 52.2001)]),
],
},
crs="EPSG:4326",
)
result = _filter_streets_by_length(gdf, 500)
# Only streets >= 500m should be included
for _, row in result.iterrows():
assert row["length_m"] >= 500
def test_single_segment(self) -> None:
gdf = gpd.GeoDataFrame(
{
"name": ["Marszałkowska"],
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
},
crs="EPSG:4326",
)
result = _filter_streets_by_length(gdf, 0)
# Single segment should remain a LineString
assert len(result) == 1
def test_unknown_name_excluded(self) -> None:
gdf = gpd.GeoDataFrame(
{
"name": ["Unknown"],
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
},
crs="EPSG:4326",
)
result = _filter_streets_by_length(gdf, 0)
assert len(result) == 0
def test_empty_name_excluded(self) -> None:
gdf = gpd.GeoDataFrame(
{
"name": [""],
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
},
crs="EPSG:4326",
)
result = _filter_streets_by_length(gdf, 0)
assert len(result) == 0
def test_no_name_column(self) -> None:
gdf = gpd.GeoDataFrame(
{
"geometry": [LineString([(21.0, 52.2), (21.0, 52.3)])],
},
crs="EPSG:4326",
)
result = _filter_streets_by_length(gdf, 0)
assert len(result) == 0
class TestGetWarsawLandmarks:
"""Tests for get_warsaw_landmarks."""
@patch("python_pkg.geo_data._warsaw_places.gpd.read_file")
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
def test_cached(self, mock_cache_dir: MagicMock, mock_read: MagicMock) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = True
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_read.return_value = mock_gdf
result = get_warsaw_landmarks()
assert result is mock_gdf
@patch("python_pkg.geo_data._warsaw_places.gpd.GeoDataFrame.from_features")
@patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir")
@patch("python_pkg.geo_data._warsaw_places._overpass_query")
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
@patch("python_pkg.geo_data._warsaw_places.sys.stdout")
def test_downloads(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
mock_from_features: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {
"elements": [
# Node with tourism
{
"type": "node",
"tags": {"name": "Muzeum Chopina", "tourism": "museum"},
"lon": 21.0,
"lat": 52.2,
},
# Way with center
{
"type": "way",
"tags": {"name": "Łazienki", "tourism": "attraction"},
"center": {"lon": 21.0, "lat": 52.2},
},
# Node with historic
{
"type": "node",
"tags": {"name": "Kolumna Zygmunta", "historic": "monument"},
"lon": 21.0,
"lat": 52.2,
},
# Node with leisure
{
"type": "node",
"tags": {"name": "Park Skaryszewski", "leisure": "park"},
"lon": 21.0,
"lat": 52.2,
},
# Node no tourism/historic/leisure -> "landmark"
{
"type": "node",
"tags": {"name": "Generic"},
"lon": 21.0,
"lat": 52.2,
},
# Duplicate
{
"type": "node",
"tags": {"name": "Muzeum Chopina", "tourism": "museum"},
"lon": 21.0,
"lat": 52.2,
},
# No name
{
"type": "node",
"tags": {"tourism": "museum"},
"lon": 21.0,
"lat": 52.2,
},
# Way without center
{
"type": "way",
"tags": {"name": "No Center"},
},
]
}
mock_gdf = MagicMock(spec=gpd.GeoDataFrame)
mock_from_features.return_value = mock_gdf
result = get_warsaw_landmarks()
assert result is mock_gdf
@patch("python_pkg.geo_data._warsaw_places._ensure_cache_dir")
@patch("python_pkg.geo_data._warsaw_places._overpass_query")
@patch("python_pkg.geo_data._warsaw_places.CACHE_DIR")
@patch("python_pkg.geo_data._warsaw_places.sys.stdout")
def test_empty_result(
self,
mock_stdout: MagicMock,
mock_cache_dir: MagicMock,
mock_query: MagicMock,
mock_ensure: MagicMock,
) -> None:
mock_path = MagicMock()
mock_cache_dir.__truediv__ = MagicMock(return_value=mock_path)
mock_path.exists.return_value = False
mock_query.return_value = {"elements": []}
result = get_warsaw_landmarks()
assert len(result) == 0

View File

@ -1,163 +0,0 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
*.pdf

View File

@ -1 +0,0 @@
3.9.0

View File

@ -1,4 +0,0 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning

View File

@ -1 +0,0 @@
"""Symmetric splitting utilities package."""

View File

@ -1,4 +0,0 @@
#!/usr/bin/env bash
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
"$SCRIPT_DIR/../../.venv/bin/python" "$SCRIPT_DIR/split_x_into_n_symmetrically.py" "$@"

View File

@ -1,60 +0,0 @@
"""Distribute values symmetrically across N parts."""
def calculate_symmetric_weights(
n: int, middle_weight: float, factors: list[float] | None = None
) -> list[float]:
"""Calculate symmetric weights for both even and odd N.
Args:
n: Number of parts to split into.
middle_weight: The middle value for symmetry.
factors: If provided, controls the difference in weights.
Must have length n // 2 or n // 2 - 1 depending on n.
Returns:
List of symmetric weights.
"""
half_n = n // 2
weights_left: list[float] = [middle_weight]
if factors:
for factor in factors:
next_weight = weights_left[-1] + factor
weights_left.append(next_weight)
else:
weights_left.extend(middle_weight - (idx + 1) for idx in range(half_n - 1))
if not n % 2:
weights = weights_left[::-1] + weights_left
else:
weights = [*weights_left[::-1], middle_weight, *weights_left]
return weights
def scale_to_total(x: float, weights: list[float]) -> list[float]:
"""Scale the weights so that their sum is proportional to X.
Args:
x: Total value to distribute.
weights: The list of weights to be scaled.
Returns:
List of scaled values summing to x.
"""
total_weight = sum(weights)
base_unit = x / total_weight
return [base_unit * weight for weight in weights]
def split_x_into_n_symmetrically(x: float, n: int, factors: list[float]) -> list[float]:
"""Split X into N parts with symmetric weights controlled by factors."""
weights = calculate_symmetric_weights(n, middle_weight=1, factors=factors)
return scale_to_total(x, weights)
def split_x_into_n_middle(x: float, n: int, middle_value: float) -> list[float]:
"""Split X into N parts with symmetric weights using middle_value as peak."""
weights = calculate_symmetric_weights(n, middle_weight=middle_value)
return scale_to_total(x, weights)

View File

@ -1 +0,0 @@
"""Tests for split module."""

View File

@ -1,118 +0,0 @@
"""Unit tests for split_x_into_n_symmetrically module."""
import pytest
from python_pkg.split.split_x_into_n_symmetrically import (
calculate_symmetric_weights,
scale_to_total,
split_x_into_n_middle,
split_x_into_n_symmetrically,
)
class TestCalculateSymmetricWeights:
"""Tests for calculate_symmetric_weights function."""
def test_odd_n_without_factors(self) -> None:
"""Test odd N creates symmetric weights around middle."""
weights = calculate_symmetric_weights(n=5, middle_weight=3)
# For n=5, half_n=2, should be symmetric around middle
assert len(weights) == 5
# Check symmetry
assert weights[0] == weights[-1]
assert weights[1] == weights[-2]
def test_even_n_without_factors(self) -> None:
"""Test even N creates symmetric weights."""
weights = calculate_symmetric_weights(n=4, middle_weight=2)
assert len(weights) == 4
# Check symmetry
assert weights[0] == weights[-1]
assert weights[1] == weights[-2]
def test_with_factors(self) -> None:
"""Test custom factors are applied correctly."""
weights = calculate_symmetric_weights(n=4, middle_weight=1, factors=[0.5, 0.3])
# Factors control growth from middle, so we get 2 * len(factors) + mirrored
assert len(weights) == 6 # Actual behavior based on factors
# Check symmetry
assert weights[0] == weights[-1]
assert weights[1] == weights[-2]
def test_n_equals_1(self) -> None:
"""Test single part returns weights based on algorithm."""
weights = calculate_symmetric_weights(n=1, middle_weight=5)
# Odd case with half_n=0: [middle_weight] reversed + middle + [middle_weight]
assert weights == [5, 5, 5]
def test_n_equals_2(self) -> None:
"""Test two parts returns two equal weights."""
weights = calculate_symmetric_weights(n=2, middle_weight=3)
assert len(weights) == 2
assert weights[0] == weights[1]
class TestScaleToTotal:
"""Tests for scale_to_total function."""
def test_scale_to_total_basic(self) -> None:
"""Test weights are scaled to sum to x."""
weights = [1.0, 2.0, 1.0]
scaled = scale_to_total(x=100, weights=weights)
assert sum(scaled) == pytest.approx(100)
def test_scale_preserves_proportions(self) -> None:
"""Test scaling preserves relative proportions."""
weights = [1.0, 2.0, 3.0]
scaled = scale_to_total(x=60, weights=weights)
# Original sum is 6, so each unit = 10
assert scaled[0] == pytest.approx(10)
assert scaled[1] == pytest.approx(20)
assert scaled[2] == pytest.approx(30)
def test_scale_with_floats(self) -> None:
"""Test scaling works with float weights."""
weights = [0.5, 1.0, 0.5]
scaled = scale_to_total(x=10, weights=weights)
assert sum(scaled) == pytest.approx(10)
class TestSplitXIntoNSymmetrically:
"""Tests for split_x_into_n_symmetrically function."""
def test_split_basic(self) -> None:
"""Test basic split with factors."""
result = split_x_into_n_symmetrically(x=100, n=4, factors=[0.5, 0.2])
# Length depends on factors, not just n
assert len(result) == 6 # Actual behavior
assert sum(result) == pytest.approx(100)
# Check symmetry
assert result[0] == pytest.approx(result[-1])
assert result[1] == pytest.approx(result[-2])
def test_split_preserves_total(self) -> None:
"""Test that the split preserves the total value."""
result = split_x_into_n_symmetrically(x=1000, n=5, factors=[0.1, 0.2])
assert sum(result) == pytest.approx(1000)
class TestSplitXIntoNMiddle:
"""Tests for split_x_into_n_middle function."""
def test_split_middle_basic(self) -> None:
"""Test basic split using middle value."""
result = split_x_into_n_middle(x=100, n=3, middle_value=2)
assert len(result) == 3
assert sum(result) == pytest.approx(100)
def test_split_middle_symmetric(self) -> None:
"""Test that result is symmetric."""
result = split_x_into_n_middle(x=100, n=5, middle_value=3)
assert result[0] == pytest.approx(result[-1])
assert result[1] == pytest.approx(result[-2])
def test_split_middle_even_parts(self) -> None:
"""Test split with even number of parts."""
result = split_x_into_n_middle(x=50, n=4, middle_value=1)
assert len(result) == 4
assert sum(result) == pytest.approx(50)

View File

@ -1,191 +0,0 @@
# ==============================================================================
# Python Development Dependencies - Linting, Formatting, and Testing
# ==============================================================================
# Install with: pip install -r requirements-dev.txt
# ==============================================================================
# Include base requirements
-r requirements.txt
# add-trailing-comma - Add trailing commas
add-trailing-comma>=3.1.0
# autoflake - Remove unused imports and variables
autoflake>=2.2.0
# autopep8 - PEP 8 formatting (alternative formatter)
autopep8>=2.0.0
# ==============================================================================
# SECURITY LINTERS
# ==============================================================================
# Bandit - Security linter
bandit>=1.7.0
# Black - The uncompromising code formatter (fallback/comparison)
black>=24.0.0
# ==============================================================================
# SPELL CHECKING
# ==============================================================================
# codespell - Fix common misspellings
codespell>=2.2.0
# Coverage.py - Code coverage measurement
coverage>=7.4.0
# darglint - Check docstrings match function signatures
darglint>=1.8.0
# dead - Find dead code
dead>=1.5.0
# docformatter - Formats docstrings
docformatter>=1.7.0
# fixit - Auto-fix linting errors
fixit>=2.1.0
# Flake8 - Linting tool (wraps pyflakes, pycodestyle, mccabe)
flake8>=7.0.0
flake8-annotations>=3.0.0 # Type annotation checks
flake8-bandit>=4.1.0 # Security checks via bandit
# Flake8 plugins for maximum coverage
flake8-bugbear>=24.0.0 # Additional bug detection
flake8-comprehensions>=3.14.0 # Better list/dict/set comprehensions
flake8-docstrings>=1.7.0 # Docstring checks
flake8-eradicate>=1.5.0 # Dead code detection
flake8-pie>=0.16.0 # Miscellaneous lints
flake8-print>=5.0.0 # Detect print statements
flake8-pyi>=24.0.0 # Type stub file checks
flake8-pytest-style>=2.0.0 # Pytest style checks
flake8-return>=1.2.0 # Better return statement checks
flake8-simplify>=0.21.0 # Simplification suggestions
# Hypothesis - Property-based testing
hypothesis>=6.98.0
# ==============================================================================
# IMPORT CHECKING
# ==============================================================================
# importlib-metadata for import analysis
importlib-metadata>=7.0.0
# ==============================================================================
# DOCUMENTATION
# ==============================================================================
# pep257 - PEP 257 docstring checker (legacy, use pydocstyle)
# interrogate - Check docstring coverage
interrogate>=1.5.0
# isort - Import sorting (ruff handles this, but useful standalone)
isort>=5.13.0
# mccabe - McCabe complexity checker
mccabe>=0.7.0
# ==============================================================================
# TYPE CHECKING
# ==============================================================================
# MyPy - Static type checker
mypy>=1.8.0
# pip-audit - Audit Python packages for known vulnerabilities
pip-audit>=2.6.0
# pipdeptree - Show dependency tree
pipdeptree>=2.14.0
# ==============================================================================
# PRE-COMMIT
# ==============================================================================
# pre-commit - Git hook management
pre-commit>=3.6.0
# prospector - Python static analysis tool
prospector>=1.10.0
# pycodestyle - Python style guide checker (PEP 8)
pycodestyle>=2.11.0
# pydocstyle - Docstring style checker (PEP 257)
pydocstyle>=6.3.0
# pyflakes - Passive checker of Python programs
pyflakes>=3.2.0
# pylama - Code audit tool (wraps multiple linters)
pylama>=8.4.0
# ==============================================================================
# LINTERS
# ==============================================================================
# Pylint - Comprehensive Python linter
pylint>=3.0.0
# Pyright - Microsoft's type checker (very strict)
pyright>=1.1.350
# ==============================================================================
# TESTING
# ==============================================================================
# pytest - Testing framework
pytest>=8.0.0
# pytest plugins
pytest-cov>=4.1.0 # Coverage plugin
pytest-randomly>=3.15.0 # Randomize test order
pytest-sugar>=1.0.0 # Better test output
pytest-timeout>=2.2.0 # Test timeouts
pytest-xdist>=3.5.0 # Parallel test execution
# ==============================================================================
# ADDITIONAL TOOLS
# ==============================================================================
# pyupgrade - Upgrade Python syntax
pyupgrade>=3.15.0
# Radon - Code metrics (complexity, maintainability)
radon>=6.0.0
# reorder-python-imports - Reorder imports
reorder-python-imports>=3.12.0
# ==============================================================================
# CODE FORMATTERS
# ==============================================================================
# Ruff - Extremely fast Python linter and formatter (replaces many tools)
ruff>=0.8.0
# Safety - Check dependencies for known security vulnerabilities
safety>=2.3.0
types-python-dateutil>=2.8.0
types-PyYAML>=6.0.0
# Type stubs for common packages
types-requests>=2.31.0
types-setuptools>=69.0.0
# ==============================================================================
# CODE QUALITY & DEAD CODE DETECTION
# ==============================================================================
# Vulture - Find dead code
vulture>=2.10
# xenon - Monitor code complexity
xenon>=0.9.0
# yapf - Yet Another Python Formatter (Google's formatter)
yapf>=0.40.0

View File

@ -1,21 +0,0 @@
aiohttp>=3.9
beautifulsoup4>=4.0
berserk>=0.13
bottle>=0.12
genanki>=0.13
geopandas>=1.0
howlongtobeatpy>=1.0
lxml>=5.0
# Optional dependencies for specific scripts (needed for full pylint analysis)
matplotlib>=3.0
mitmproxy>=10.0
numpy>=1.20
opencv-python>=4.0
pillow>=10.0
pygame>=2.0
pytest>=7.0
python-chess>=1.999
requests>=2.0
selenium>=4.0
websockets>=13.0

1
requirements.txt Symbolic link
View File

@ -0,0 +1 @@
meta/requirements.txt

148
run.sh
View File

@ -1,148 +0,0 @@
#!/bin/bash
# Easy entrypoint for system usage reports and polling script diagnostics.
# Usage:
# ./run.sh # today's report to stdout
# ./run.sh --date 20260501 # specific day
# ./run.sh --top 25 # override row count
# ./run.sh --profile [duration] # profile polling scripts (default 60s)
# ./run.sh --diagnose # find inefficient shell scripts
# ./run.sh --init-artifacts ... # bootstrap contract/evidence/session artifacts
#
# Any other args are forwarded to usage_report.py unchanged.
set -euo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
REPORT_SCRIPT="$SCRIPT_DIR/linux_configuration/scripts/system-maintenance/bin/usage_report.py"
ARTIFACT_INIT_SCRIPT="$SCRIPT_DIR/scripts/init_agent_artifacts.sh"
if [[ ! -f "$REPORT_SCRIPT" ]]; then
echo "Error: usage_report.py not found at: $REPORT_SCRIPT" >&2
exit 1
fi
if [[ ! -f "$ARTIFACT_INIT_SCRIPT" ]]; then
echo "Error: init_agent_artifacts.sh not found at: $ARTIFACT_INIT_SCRIPT" >&2
exit 1
fi
# Profiling mode: trace fork-heavy scripts over time
profile_polling_scripts() {
local duration="${1:-60}"
echo "=== Polling Script Profiler (${duration}s) ===" >&2
echo "Tracing fork/exec calls in shell scripts..." >&2
echo "" >&2
# Find common polling script processes and trace them
local trace_file="/tmp/polling_trace_$$.txt"
# Use perf/strace to capture system calls
(
timeout "$duration" strace -f -e trace=clone,execve -c -p $$ 2>&1 || true
) > "$trace_file" 2>&1
echo "Trace completed. Analyzing results:" >&2
echo "" >&2
# Show fork/exec heavy processes
if ! grep -e "execve" -e "clone" "$trace_file" | head -20; then
:
fi
rm -f "$trace_file"
}
# Diagnostic mode: find inefficient patterns in shell scripts
diagnose_polling_scripts() {
echo "=== Shell Script Efficiency Audit ===" >&2
echo "" >&2
local issues_found=0
# Check for common anti-patterns
echo "Checking for anti-patterns in shell scripts..." >&2
echo "" >&2
# Pattern 1: while true with sleep (no event-driven check)
echo "1. Polling loops (while true + sleep):" >&2
set +e
grep -r "while true\|while :" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 2: $(date +...) calls in loops (fork-heavy)
echo "2. Excessive date calls (each forks a process):" >&2
set +e
grep -r '\$(date' --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 3: pgrep/xdotool in loops
echo "3. Process inspection in loops (pgrep, xdotool):" >&2
set +e
grep -r "while.*pgrep\|while.*xdotool\|pgrep.*while" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 4: pipes in hot paths
echo "4. Heavy pipes in polling scripts (| awk, | grep, | tr):" >&2
set +e
while_true_file_list="$(mktemp)"
heavy_pipe_matches="$(mktemp)"
grep -r "while true" --include="*.sh" "$SCRIPT_DIR" > "$while_true_file_list" 2>/dev/null
if [ -s "$while_true_file_list" ]; then
xargs grep -l -e " | awk" -e " | grep" -e " | tr" < "$while_true_file_list" > "$heavy_pipe_matches" 2>/dev/null
head -5 "$heavy_pipe_matches"
fi
rm -f "$while_true_file_list" "$heavy_pipe_matches"
set -e
issues_found=$((issues_found + 1))
echo "" >&2
# Pattern 5: sleep with very short intervals
echo "5. Aggressive polling (sleep < 1s):" >&2
set +e
grep -rE "sleep 0\.[0-9]|sleep 0[^0-9]" --include="*.sh" "$SCRIPT_DIR" 2>/dev/null \
| grep -v "Binary" | grep -v ".git" | head -5
set -e
issues_found=$((issues_found + 1))
echo "" >&2
echo "=== Recommendations ===" >&2
echo "1. Replace 'while true + sleep' with event-driven I/O (inotifywait, read -t, etc.)" >&2
echo "2. Use /proc and /sys instead of forking date, sensors, acpi, etc." >&2
echo "3. Cache frequently accessed values (e.g., in /tmp state files)" >&2
echo "4. Use bash builtins: printf %()T instead of date, \${var//} instead of tr, etc." >&2
echo "5. Use i3blocks interval=persist + event loop instead of polling mode" >&2
echo "6. Increase polling intervals: 1s → 5s → 10s where acceptable" >&2
}
# Handle special modes
case "${1:-}" in
--profile)
profile_polling_scripts "${2:-60}"
exit 0
;;
--diagnose)
diagnose_polling_scripts
exit 0
;;
--init-artifacts)
shift
exec "$ARTIFACT_INIT_SCRIPT" "$@"
;;
--help)
grep '^# Usage:' "$0" | sed 's/^# //' | head -1
grep '^# ' "$0" | sed 's/^# / /'
exit 0
;;
esac
# Default: run usage_report.py with all remaining args
exec python3 "$REPORT_SCRIPT" "$@"

1
run.sh Symbolic link
View File

@ -0,0 +1 @@
meta/run.sh

View File

@ -1,40 +0,0 @@
#!/usr/bin/env bash
# Check that every directory containing C/C++ source files has a Makefile and run.sh.
# Used as a pre-commit hook; receives staged file paths as arguments.
set -uo pipefail
errors=()
declare -A checked_dirs
for file in "$@"; do
dir=$(dirname "$file")
# Skip build directories and CMake artefact trees
if echo "$dir" | grep -qE '(^|/)build(/|$)'; then
continue
fi
# Skip if already checked this directory
[[ -v checked_dirs["$dir"] ]] && continue
checked_dirs["$dir"]=1
# Check for Makefile (case-insensitive: Makefile or makefile)
if ! compgen -G "$dir/[Mm]akefile" > /dev/null 2>&1; then
errors+=("MISSING Makefile in: $dir")
fi
# Check for run.sh
if [[ ! -f "$dir/run.sh" ]]; then
errors+=("MISSING run.sh in: $dir")
fi
done
if [[ ${#errors[@]} -gt 0 ]]; then
printf 'C/C++ build file check failed:\n'
printf ' %s\n' "${errors[@]}"
printf '\nEvery directory with .c/.cpp files must have a Makefile and run.sh.\n'
exit 1
fi
exit 0

View File

@ -1,33 +0,0 @@
#!/usr/bin/env bash
# Post-clone setup script for testsAndMisc repository.
# Run once after cloning: ./setup.sh
set -euo pipefail
repo_root="$(git rev-parse --show-toplevel)"
cd "$repo_root"
printf 'Configuring git hooks path...\n'
git config core.hooksPath linux_configuration/.githooks
printf ' ✓ core.hooksPath set to linux_configuration/.githooks\n'
# Check for C/C++ and shell lint tools (used by pre-commit hooks)
MISSING=()
for cmd in clang-format cppcheck flawfinder shellcheck node npx; do
command -v "$cmd" >/dev/null 2>&1 || MISSING+=("$cmd")
done
if [[ ${#MISSING[@]} -gt 0 ]]; then
printf '\n⚠ Missing tools for pre-commit hooks: %s\n' "${MISSING[*]}"
if command -v pacman >/dev/null 2>&1; then
printf ' Install with: sudo pacman -S --needed %s\n' "${MISSING[*]}"
elif command -v apt-get >/dev/null 2>&1; then
printf ' Install with: sudo apt-get install %s\n' "${MISSING[*]}"
else
printf ' Please install: %s\n' "${MISSING[*]}"
fi
else
printf ' ✓ All lint tools available\n'
fi
printf '\nSetup complete.\n'