From 59ccf65c6dc59120bc401abd77fe77d69a98208c Mon Sep 17 00:00:00 2001 From: Krzysztof kuhy Rudnicki Date: Thu, 8 Jan 2026 17:01:54 +0100 Subject: [PATCH] feat: comprehensive Poland geography cards --- .gitignore | 12 + python_pkg/geo_data.py | 1056 +++++++++++++++++ python_pkg/polish_gminy/__init__.py | 1 + python_pkg/polish_gminy/polish_gminy_anki.py | 404 +++++++ python_pkg/polish_gminy/run.sh | 36 + python_pkg/polish_powiaty/__init__.py | 1 + .../polish_powiaty/polish_powiaty_anki.py | 310 +++++ python_pkg/polish_powiaty/run.sh | 34 + python_pkg/preview_all.html | 82 ++ python_pkg/warsaw_bridges/__init__.py | 1 + python_pkg/warsaw_bridges/run.sh | 34 + .../warsaw_bridges/warsaw_bridges_anki.py | 318 +++++ python_pkg/warsaw_landmarks/__init__.py | 1 + python_pkg/warsaw_landmarks/run.sh | 34 + .../warsaw_landmarks/warsaw_landmarks_anki.py | 272 +++++ python_pkg/warsaw_metro/__init__.py | 1 + python_pkg/warsaw_metro/run.sh | 34 + python_pkg/warsaw_metro/warsaw_metro_anki.py | 295 +++++ python_pkg/warsaw_osiedla/__init__.py | 1 + python_pkg/warsaw_osiedla/run.sh | 34 + .../warsaw_osiedla/warsaw_osiedla_anki.py | 327 +++++ python_pkg/warsaw_streets/README.md | 37 + python_pkg/warsaw_streets/__init__.py | 1 + python_pkg/warsaw_streets/run.sh | 34 + .../warsaw_streets/warsaw_streets_anki.py | 356 ++++++ 25 files changed, 3716 insertions(+) create mode 100644 python_pkg/geo_data.py create mode 100644 python_pkg/polish_gminy/__init__.py create mode 100755 python_pkg/polish_gminy/polish_gminy_anki.py create mode 100755 python_pkg/polish_gminy/run.sh create mode 100644 python_pkg/polish_powiaty/__init__.py create mode 100755 python_pkg/polish_powiaty/polish_powiaty_anki.py create mode 100755 python_pkg/polish_powiaty/run.sh create mode 100644 python_pkg/preview_all.html create mode 100644 python_pkg/warsaw_bridges/__init__.py create mode 100755 python_pkg/warsaw_bridges/run.sh create mode 100755 python_pkg/warsaw_bridges/warsaw_bridges_anki.py create mode 100644 python_pkg/warsaw_landmarks/__init__.py create mode 100755 python_pkg/warsaw_landmarks/run.sh create mode 100755 python_pkg/warsaw_landmarks/warsaw_landmarks_anki.py create mode 100644 python_pkg/warsaw_metro/__init__.py create mode 100755 python_pkg/warsaw_metro/run.sh create mode 100755 python_pkg/warsaw_metro/warsaw_metro_anki.py create mode 100644 python_pkg/warsaw_osiedla/__init__.py create mode 100755 python_pkg/warsaw_osiedla/run.sh create mode 100755 python_pkg/warsaw_osiedla/warsaw_osiedla_anki.py create mode 100644 python_pkg/warsaw_streets/README.md create mode 100644 python_pkg/warsaw_streets/__init__.py create mode 100755 python_pkg/warsaw_streets/run.sh create mode 100755 python_pkg/warsaw_streets/warsaw_streets_anki.py diff --git a/.gitignore b/.gitignore index 04dd9ef..1933b9a 100644 --- a/.gitignore +++ b/.gitignore @@ -258,3 +258,15 @@ python_pkg/music_gen/output/ python_pkg/screen_locker/sick_day_state.json python_pkg/screen_locker/workout_log.json.bak preview_images +# Anki generated packages (large binary files) +*.apkg + +# Geographic data cache (can be regenerated) +python_pkg/geo_cache/ + +# Local venvs in subpackages +python_pkg/*/.venv/ +python_pkg/*/cache/ + +# Large geojson files that can be downloaded +python_pkg/warsaw_districts/warszawa-dzielnice.geojson diff --git a/python_pkg/geo_data.py b/python_pkg/geo_data.py new file mode 100644 index 0000000..07cdae0 --- /dev/null +++ b/python_pkg/geo_data.py @@ -0,0 +1,1056 @@ +"""Shared geographic data module for Warsaw and Poland Anki generators. + +This module handles downloading and caching geographic data from various sources: +- OpenStreetMap via Overpass API +- Geofabrik OSM extracts +- GitHub repositories with pre-processed GeoJSON + +All data is cached locally to avoid repeated downloads. +""" + +from __future__ import annotations + +import contextlib +import json +from pathlib import Path +import shutil +import sys +import time +from typing import TYPE_CHECKING +from urllib.request import urlopen + +import geopandas as gpd +import requests +from shapely.geometry import LineString, MultiLineString + +if TYPE_CHECKING: + from typing import Any + +# Shared cache directory for all geo data +CACHE_DIR = Path(__file__).parent / "geo_cache" + +# Overpass API endpoints (multiple for redundancy) +# Note: kumi.systems is more reliable, so it's first +OVERPASS_ENDPOINTS = [ + "https://overpass.kumi.systems/api/interpreter", + "https://overpass-api.de/api/interpreter", + "https://maps.mail.ru/osm/tools/overpass/api/interpreter", +] + +# GitHub URLs for pre-processed data +POLSKA_GEOJSON_BASE = "https://raw.githubusercontent.com/ppatrzyk/polska-geojson/master" + +# Wikidata SPARQL endpoint +WIKIDATA_SPARQL = "https://query.wikidata.org/sparql" + +# Request timeout and retry settings +REQUEST_TIMEOUT = 180 +MAX_RETRIES = 3 +RETRY_DELAY = 5 + + +def _ensure_cache_dir() -> None: + """Create cache directory if it doesn't exist.""" + CACHE_DIR.mkdir(parents=True, exist_ok=True) + + +def _query_wikidata(query: str) -> list[dict[str, Any]]: + """Query Wikidata SPARQL endpoint. + + Args: + query: SPARQL query string. + + Returns: + List of result bindings. + """ + response = requests.get( + WIKIDATA_SPARQL, + params={"query": query, "format": "json"}, + timeout=60, + ) + response.raise_for_status() + return response.json()["results"]["bindings"] + + +def _get_powiaty_population() -> dict[str, int]: + """Get population data for all Polish powiaty from Wikidata. + + Returns: + Dictionary mapping powiat name to population. + """ + cache_path = CACHE_DIR / "powiaty_population.json" + + if cache_path.exists(): + return json.loads(cache_path.read_text()) + + # Query Wikidata for all powiaty (Q247073) in Poland (Q36) with population + # Filter to only current Polish powiaty using country=Poland filter + query = """ + SELECT ?powiat ?powiatLabel ?population WHERE { + ?powiat wdt:P31 wd:Q247073. + ?powiat wdt:P17 wd:Q36. + ?powiat wdt:P1082 ?population. + SERVICE wikibase:label { bd:serviceParam wikibase:language "pl,en". } + } + ORDER BY DESC(?population) + """ + + sys.stdout.write("Fetching powiaty population data from Wikidata...\n") + results = _query_wikidata(query) + + population_map: dict[str, int] = {} + for item in results: + label = item.get("powiatLabel", {}).get("value", "") + pop = item.get("population", {}).get("value", "0") + if label and pop: + # Remove "powiat" prefix if present for matching + clean_label = label.replace("powiat ", "").strip() + with contextlib.suppress(ValueError): + population_map[clean_label] = int(pop) + + _ensure_cache_dir() + cache_path.write_text(json.dumps(population_map, ensure_ascii=False, indent=2)) + + sys.stdout.write(f"Cached population data for {len(population_map)} powiaty.\n") + return population_map + + +def _try_single_request( + endpoint: str, query: str +) -> tuple[dict[str, Any] | None, Exception | None]: + """Try a single request to an endpoint. + + Args: + endpoint: Overpass API endpoint URL. + query: Overpass QL query string. + + Returns: + Tuple of (result, error). One will be None. + """ + try: + sys.stdout.write(f" Querying {endpoint}...\n") + response = requests.post( + endpoint, + data={"data": query}, + timeout=REQUEST_TIMEOUT, + ) + response.raise_for_status() + result = response.json() + except (requests.RequestException, requests.Timeout, ValueError) as e: + return None, e + else: + # Check for valid response with elements + if not isinstance(result, dict) or "elements" not in result: + return None, ValueError("Invalid response format") + return result, None + + +def _overpass_query(query: str) -> dict[str, Any]: + """Execute an Overpass API query with retry logic. + + Args: + query: Overpass QL query string. + + Returns: + JSON response from the API. + + Raises: + RuntimeError: If all endpoints fail. + """ + last_error: Exception | None = None + + for endpoint in OVERPASS_ENDPOINTS: + for attempt in range(MAX_RETRIES): + result, error = _try_single_request(endpoint, query) + if result is not None: + return result + last_error = error + sys.stdout.write(f" Attempt {attempt + 1} failed: {error}\n") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_DELAY) + + msg = f"All Overpass API endpoints failed. Last error: {last_error}" + raise RuntimeError(msg) + + +def _download_github_geojson(url: str, cache_path: Path) -> gpd.GeoDataFrame: + """Download GeoJSON from GitHub and cache it. + + Args: + url: URL to download from. + cache_path: Path to cache the data. + + Returns: + GeoDataFrame with the data. + """ + if cache_path.exists(): + return gpd.read_file(cache_path) + + sys.stdout.write(f"Downloading from {url}...\n") + with urlopen(url, timeout=REQUEST_TIMEOUT) as response: # noqa: S310 + data = json.loads(response.read().decode()) + + _ensure_cache_dir() + cache_path.write_text(json.dumps(data)) + + return gpd.GeoDataFrame.from_features(data["features"], crs="EPSG:4326") + + +# ============================================================================= +# Warsaw Data +# ============================================================================= + + +def get_warsaw_boundary() -> gpd.GeoDataFrame: + """Get Warsaw city boundary. + + Returns: + GeoDataFrame with Warsaw boundary polygon. + """ + cache_path = CACHE_DIR / "warsaw_boundary.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + # Try to use districts file first + districts_path = ( + Path(__file__).parent / "warsaw_districts" / "warszawa-dzielnice.geojson" + ) + if districts_path.exists(): + warsaw_gdf = gpd.read_file(districts_path) + warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"] + if len(warsaw_boundary) == 0: + warsaw_boundary = gpd.GeoDataFrame( + geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs + ) + _ensure_cache_dir() + warsaw_boundary.to_file(cache_path, driver="GeoJSON") + return warsaw_boundary + + # Fallback to Overpass query + sys.stdout.write("Fetching Warsaw boundary from OpenStreetMap...\n") + query = """ + [out:json][timeout:60]; + relation["name"="Warszawa"]["admin_level"="6"]; + out geom; + """ + + data = _overpass_query(query) + + features = [] + for element in data.get("elements", []): + if element.get("type") == "relation": + coords = [] + for member in element.get("members", []): + if member.get("role") == "outer" and "geometry" in member: + coords.extend([(p["lon"], p["lat"]) for p in member["geometry"]]) + if coords: + features.append( + { + "type": "Feature", + "properties": {"name": "Warszawa"}, + "geometry": {"type": "Polygon", "coordinates": [coords]}, + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson)) + + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def get_warsaw_districts() -> gpd.GeoDataFrame: + """Get Warsaw districts (dzielnice). + + Returns: + GeoDataFrame with district boundaries. + """ + districts_path = ( + Path(__file__).parent / "warsaw_districts" / "warszawa-dzielnice.geojson" + ) + if districts_path.exists(): + gdf = gpd.read_file(districts_path) + return gdf[gdf["name"] != "Warszawa"].copy() + + msg = "Warsaw districts GeoJSON not found" + raise FileNotFoundError(msg) + + +def get_vistula_river() -> gpd.GeoDataFrame: + """Get Vistula river in Warsaw. + + Returns: + GeoDataFrame with river geometry. + """ + cache_path = CACHE_DIR / "warsaw_vistula.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + sys.stdout.write("Fetching Vistula river data...\n") + query = """ + [out:json][timeout:60]; + area["name"="Warszawa"]["admin_level"="6"]->.warsaw; + ( + way["waterway"="river"]["name"="Wisła"](area.warsaw); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + min_coords = 2 + for element in data.get("elements", []): + if element.get("type") == "way" and "geometry" in element: + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) >= min_coords: + features.append( + { + "type": "Feature", + "properties": {"name": "Wisła"}, + "geometry": {"type": "LineString", "coordinates": coords}, + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson)) + + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def get_warsaw_bridges() -> gpd.GeoDataFrame: + """Get Warsaw bridges over the Vistula. + + Returns: + GeoDataFrame with bridge geometries. + """ + cache_path = CACHE_DIR / "warsaw_bridges.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + sys.stdout.write("Fetching Warsaw bridges data...\n") + + # First get the Vistula to filter bridges + vistula = get_vistula_river() + vistula_union = vistula.union_all() + vistula_buffer = vistula_union.buffer(0.002) # ~200m buffer + + # Query for bridges with "Most" in name - smaller query + query = """ + [out:json][timeout:90]; + area["name"="Warszawa"]["admin_level"="6"]->.warsaw; + way["bridge"="yes"]["name"~"^Most"](area.warsaw); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + min_coords = 2 + + for element in data.get("elements", []): + if element.get("type") != "way" or "geometry" not in element: + continue + + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) < min_coords: + continue + + line = LineString(coords) + + # Check if bridge crosses/is near Vistula + if line.intersects(vistula_buffer): + seen_names.add(name) + features.append( + { + "type": "Feature", + "properties": {"name": name, "osm_id": element.get("id")}, + "geometry": {"type": "LineString", "coordinates": coords}, + } + ) + + # Merge segments of the same bridge + merged_features = _merge_bridge_segments(features) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": merged_features} + cache_path.write_text(json.dumps(geojson)) + + sys.stdout.write(f"Cached {len(merged_features)} bridges.\n") + return gpd.GeoDataFrame.from_features(merged_features, crs="EPSG:4326") + + +def _merge_bridge_segments(features: list[dict]) -> list[dict]: + """Merge bridge segments with the same name. + + Args: + features: List of GeoJSON features. + + Returns: + List of merged features. + """ + by_name: dict[str, list[list[tuple[float, float]]]] = {} + + for feature in features: + name = feature["properties"]["name"] + coords = feature["geometry"]["coordinates"] + if name not in by_name: + by_name[name] = [] + by_name[name].append(coords) + + merged = [] + for name, coord_lists in by_name.items(): + if len(coord_lists) == 1: + geom = {"type": "LineString", "coordinates": coord_lists[0]} + else: + geom = {"type": "MultiLineString", "coordinates": coord_lists} + + merged.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geom} + ) + + return merged + + +def get_warsaw_metro_stations() -> gpd.GeoDataFrame: + """Get Warsaw metro stations with line information. + + Returns: + GeoDataFrame with station points and line info (M1, M2, or M1/M2). + """ + cache_path = CACHE_DIR / "warsaw_metro.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + # Known stations for each line (as of 2024) + m1_stations = { + "Kabaty", + "Natolin", + "Imielin", + "Stokłosy", + "Ursynów", + "Służew", + "Wilanowska", + "Wierzbno", + "Racławicka", + "Pole Mokotowskie", + "Politechnika", + "Centrum", + "Świętokrzyska", # Also M2 + "Ratusz-Arsenał", + "Dworzec Gdański", + "Plac Wilsona", + "Marymont", + "Słodowiec", + "Stare Bielany", + "Wawrzyszew", + "Młociny", + } + m2_stations = { + "Bródno", + "Kondratowicza", + "Zacisze", + "Targówek Mieszkaniowy", + "Trocka", + "Szwedzka", + "Dworzec Wileński", + "Świętokrzyska", # Also M1 + "Nowy Świat-Uniwersytet", + "Centrum Nauki Kopernik", + "Stadion Narodowy", + "Rondo ONZ", + "Rondo Daszyńskiego", + "Płocka", + "Młynów", + "Księcia Janusza", + "Ulrychów", + "Bemowo", + } + + sys.stdout.write("Fetching metro station data...\n") + query = """ + [out:json][timeout:60]; + area["name"="Warszawa"]["admin_level"="6"]->.warsaw; + ( + node["railway"="station"]["station"="subway"](area.warsaw); + node["railway"="station"]["network"~"Metro"](area.warsaw); + ); + out body; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + if element.get("type") == "node": + name = element.get("tags", {}).get("name", "") + if name and name not in seen_names: + seen_names.add(name) + # Determine line from known station lists + in_m1 = name in m1_stations + in_m2 = name in m2_stations + if in_m1 and in_m2: + line = "M1/M2" + elif in_m1: + line = "M1" + elif in_m2: + line = "M2" + else: + line = "?" # Unknown station + + features.append( + { + "type": "Feature", + "properties": { + "name": name, + "line": line, + }, + "geometry": { + "type": "Point", + "coordinates": [element["lon"], element["lat"]], + }, + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson)) + + sys.stdout.write(f"Cached {len(features)} metro stations.\n") + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def get_warsaw_streets(min_length: int = 500) -> gpd.GeoDataFrame: + """Get major Warsaw streets. + + Args: + min_length: Minimum street length in meters. + + Returns: + GeoDataFrame with street geometries. + """ + cache_path = CACHE_DIR / "warsaw_streets.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + # Filter by length if needed + return _filter_streets_by_length(gdf, min_length) + + sys.stdout.write("Fetching street data from OpenStreetMap...\n") + query = """ + [out:json][timeout:120]; + area["name"="Warszawa"]["admin_level"="6"]->.warsaw; + ( + way["highway"="primary"]["name"](area.warsaw); + way["highway"="secondary"]["name"](area.warsaw); + way["highway"="tertiary"]["name"](area.warsaw); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + min_coords = 2 + + for element in data.get("elements", []): + if element.get("type") == "way" and "geometry" in element: + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) >= min_coords: + features.append( + { + "type": "Feature", + "properties": { + "name": element.get("tags", {}).get("name", "Unknown"), + "highway": element.get("tags", {}).get("highway", ""), + }, + "geometry": {"type": "LineString", "coordinates": coords}, + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson)) + + sys.stdout.write(f"Cached {len(features)} street segments.\n") + + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + return _filter_streets_by_length(gdf, min_length) + + +def _filter_streets_by_length( + gdf: gpd.GeoDataFrame, min_length: int +) -> gpd.GeoDataFrame: + """Filter and merge streets by name, keeping only those above min_length. + + Args: + gdf: GeoDataFrame with street segments. + min_length: Minimum length in meters. + + Returns: + GeoDataFrame with merged streets, sorted by length (longest first). + """ + # Group by street name + streets: dict[str, list] = {} + for _, row in gdf.iterrows(): + name = row.get("name", "Unknown") + if name and name != "Unknown": + if name not in streets: + streets[name] = [] + streets[name].append(row.geometry) + + # Merge and filter + result_rows = [] + for name, geometries in streets.items(): + merged = geometries[0] if len(geometries) == 1 else MultiLineString(geometries) + + # Create temp GeoDataFrame for length calculation + temp_gdf = gpd.GeoDataFrame(geometry=[merged], crs="EPSG:4326") + temp_proj = temp_gdf.to_crs("EPSG:2180") # Polish coordinate system + length = temp_proj.geometry.length.iloc[0] + + if length >= min_length: + result_rows.append({"name": name, "geometry": merged, "length_m": length}) + + # Sort by length (longest first) + result_rows.sort(key=lambda x: x["length_m"], reverse=True) + + return gpd.GeoDataFrame(result_rows, crs="EPSG:4326") + + +def get_warsaw_landmarks() -> gpd.GeoDataFrame: + """Get Warsaw landmarks (museums, monuments, parks, etc.). + + Returns: + GeoDataFrame with landmark points. + """ + cache_path = CACHE_DIR / "warsaw_landmarks.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + sys.stdout.write("Fetching landmark data...\n") + # Simplified query - just museums and major attractions + query = """ + [out:json][timeout:60]; + area["name"="Warszawa"]["admin_level"="6"]->.warsaw; + ( + node["tourism"="museum"]["name"](area.warsaw); + node["tourism"="attraction"]["name"](area.warsaw); + node["historic"="monument"]["name"](area.warsaw); + way["tourism"="museum"]["name"](area.warsaw); + way["tourism"="attraction"]["name"](area.warsaw); + ); + out center; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + # Get coordinates + if element.get("type") == "node": + lon, lat = element["lon"], element["lat"] + elif "center" in element: + lon, lat = element["center"]["lon"], element["center"]["lat"] + else: + continue + + seen_names.add(name) + landmark_type = ( + element.get("tags", {}).get("tourism") + or element.get("tags", {}).get("historic") + or element.get("tags", {}).get("leisure") + or "landmark" + ) + + features.append( + { + "type": "Feature", + "properties": {"name": name, "type": landmark_type}, + "geometry": {"type": "Point", "coordinates": [lon, lat]}, + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson)) + + sys.stdout.write(f"Cached {len(features)} landmarks.\n") + + if not features: + return gpd.GeoDataFrame( + {"name": [], "type": [], "geometry": []}, crs="EPSG:4326" + ) + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def _extract_osiedla_rings( + element: dict[str, Any], min_coords: int +) -> tuple[list[list[tuple[float, float]]], list[list[tuple[float, float]]]]: + """Extract outer and inner rings from an OSM relation. + + Args: + element: OSM relation element. + min_coords: Minimum number of coordinates for a valid ring. + + Returns: + Tuple of (outer_rings, inner_rings). + """ + outer_rings: list[list[tuple[float, float]]] = [] + inner_rings: list[list[tuple[float, float]]] = [] + + for member in element.get("members", []): + if "geometry" not in member: + continue + ring = [(p["lon"], p["lat"]) for p in member["geometry"]] + if len(ring) < min_coords: + continue + # Close the ring if not closed + if ring[0] != ring[-1]: + ring.append(ring[0]) + if member.get("role") == "outer": + outer_rings.append(ring) + elif member.get("role") == "inner": + inner_rings.append(ring) + + return outer_rings, inner_rings + + +def _build_osiedla_geometry( + outer_rings: list[list[tuple[float, float]]], + inner_rings: list[list[tuple[float, float]]], +) -> dict[str, Any]: + """Build GeoJSON geometry from outer and inner rings. + + Args: + outer_rings: List of outer ring coordinates. + inner_rings: List of inner ring coordinates. + + Returns: + GeoJSON geometry dict. + """ + if len(outer_rings) == 1: + return { + "type": "Polygon", + "coordinates": [outer_rings[0], *inner_rings], + } + # Multiple outer rings - create MultiPolygon + # Each polygon in a MultiPolygon is [exterior, hole1, hole2, ...] + return { + "type": "MultiPolygon", + "coordinates": [[ring] for ring in outer_rings], + } + + +def get_warsaw_osiedla() -> gpd.GeoDataFrame: + """Get Warsaw osiedla (neighborhoods). + + Returns: + GeoDataFrame with osiedla boundaries. + """ + cache_path = CACHE_DIR / "warsaw_osiedla.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + sys.stdout.write("Fetching osiedla data...\n") + query = """ + [out:json][timeout:180]; + area["name"="Warszawa"]["admin_level"="6"]->.warsaw; + relation["boundary"="administrative"]["admin_level"="11"]["name"](area.warsaw); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + min_ring_coords = 4 + + for element in data.get("elements", []): + if element.get("type") != "relation": + continue + + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) + if not outer_rings: + continue + + seen_names.add(name) + features.append( + { + "type": "Feature", + "properties": {"name": name}, + "geometry": _build_osiedla_geometry(outer_rings, inner_rings), + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson)) + + sys.stdout.write(f"Cached {len(features)} osiedla.\n") + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +# ============================================================================= +# Poland Data +# ============================================================================= + + +def get_polish_wojewodztwa() -> gpd.GeoDataFrame: + """Get Polish województwa (voivodeships). + + Returns: + GeoDataFrame with województwa boundaries. + """ + url = f"{POLSKA_GEOJSON_BASE}/wojewodztwa/wojewodztwa-min.geojson" + cache_path = CACHE_DIR / "polish_wojewodztwa.geojson" + return _download_github_geojson(url, cache_path) + + +def get_polish_powiaty() -> gpd.GeoDataFrame: + """Get Polish powiaty (counties), sorted by population descending. + + Returns: + GeoDataFrame with powiat boundaries and population. + """ + url = f"{POLSKA_GEOJSON_BASE}/powiaty/powiaty-min.geojson" + cache_path = CACHE_DIR / "polish_powiaty.geojson" + gdf = _download_github_geojson(url, cache_path) + + # Get population data from Wikidata + population_map = _get_powiaty_population() + + # Add population column + def get_population(nazwa: str) -> int: + """Match powiat name to population data.""" + if not nazwa: + return 0 + # Remove "powiat " prefix for matching + clean_name = nazwa.replace("powiat ", "").strip() + # Try direct match + if clean_name in population_map: + return population_map[clean_name] + # Try lowercase + name_lower = clean_name.lower() + for pop_name, pop in population_map.items(): + if pop_name.lower() == name_lower: + return pop + return 0 + + gdf["population"] = gdf["nazwa"].apply(get_population) + + # Sort by population descending + return gdf.sort_values("population", ascending=False).reset_index(drop=True) + + +def get_polish_gminy() -> gpd.GeoDataFrame: + """Get Polish gminy (municipalities) from OSM. + + Returns: + GeoDataFrame with gminy boundaries. + """ + cache_path = CACHE_DIR / "polish_gminy.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + sys.stdout.write("Fetching gminy data from OSM (this may take a while)...\n") + # Polish gminy are admin_level=7 in OSM + query = """ + [out:json][timeout:300]; + area["ISO3166-1"="PL"]->.pl; + relation["boundary"="administrative"]["admin_level"="7"]["name"](area.pl); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + min_ring_coords = 4 + + for element in data.get("elements", []): + if element.get("type") != "relation": + continue + + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) + if not outer_rings: + continue + + seen_names.add(name) + features.append( + { + "type": "Feature", + "properties": {"name": name}, + "geometry": _build_osiedla_geometry(outer_rings, inner_rings), + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson)) + + sys.stdout.write(f"Cached {len(features)} gminy.\n") + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def get_poland_boundary() -> gpd.GeoDataFrame: + """Get Poland country boundary. + + Returns: + GeoDataFrame with Poland boundary. + """ + cache_path = CACHE_DIR / "poland_boundary.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + # Dissolve from województwa + woj = get_polish_wojewodztwa() + # Fix invalid geometries with buffer(0) + woj["geometry"] = woj["geometry"].buffer(0) + poland = gpd.GeoDataFrame(geometry=[woj.union_all()], crs=woj.crs) + + _ensure_cache_dir() + poland.to_file(cache_path, driver="GeoJSON") + + return poland + + +# ============================================================================= +# Utility Functions +# ============================================================================= + + +def download_all_warsaw_data() -> None: + """Download and cache all Warsaw geographic data. + + Call this once to pre-populate the cache. + """ + sys.stdout.write("Downloading all Warsaw geographic data...\n") + sys.stdout.write("=" * 60 + "\n") + + sys.stdout.write("\n1. Warsaw boundary...\n") + get_warsaw_boundary() + + sys.stdout.write("\n2. Vistula river...\n") + get_vistula_river() + + sys.stdout.write("\n3. Warsaw bridges...\n") + get_warsaw_bridges() + + sys.stdout.write("\n4. Metro stations...\n") + get_warsaw_metro_stations() + + sys.stdout.write("\n5. Major streets...\n") + get_warsaw_streets() + + sys.stdout.write("\n6. Landmarks...\n") + get_warsaw_landmarks() + + sys.stdout.write("\n7. Osiedla...\n") + get_warsaw_osiedla() + + sys.stdout.write("\n" + "=" * 60 + "\n") + sys.stdout.write("All Warsaw data cached successfully!\n") + + +def download_all_poland_data() -> None: + """Download and cache all Poland geographic data. + + Call this once to pre-populate the cache. + """ + sys.stdout.write("Downloading all Poland geographic data...\n") + sys.stdout.write("=" * 60 + "\n") + + sys.stdout.write("\n1. Województwa...\n") + get_polish_wojewodztwa() + + sys.stdout.write("\n2. Powiaty...\n") + get_polish_powiaty() + + sys.stdout.write("\n3. Gminy (this may take a while)...\n") + get_polish_gminy() + + sys.stdout.write("\n4. Poland boundary...\n") + get_poland_boundary() + + sys.stdout.write("\n" + "=" * 60 + "\n") + sys.stdout.write("All Poland data cached successfully!\n") + + +def clear_cache() -> None: + """Clear all cached data.""" + if CACHE_DIR.exists(): + shutil.rmtree(CACHE_DIR) + sys.stdout.write("Cache cleared.\n") + else: + sys.stdout.write("Cache directory does not exist.\n") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Manage geographic data cache") + parser.add_argument( + "--download-warsaw", + action="store_true", + help="Download all Warsaw data", + ) + parser.add_argument( + "--download-poland", + action="store_true", + help="Download all Poland data", + ) + parser.add_argument( + "--download-all", + action="store_true", + help="Download all data", + ) + parser.add_argument( + "--clear-cache", + action="store_true", + help="Clear cached data", + ) + + args = parser.parse_args() + + if args.clear_cache: + clear_cache() + elif args.download_warsaw: + download_all_warsaw_data() + elif args.download_poland: + download_all_poland_data() + elif args.download_all: + download_all_warsaw_data() + download_all_poland_data() + else: + parser.print_help() diff --git a/python_pkg/polish_gminy/__init__.py b/python_pkg/polish_gminy/__init__.py new file mode 100644 index 0000000..d4273f1 --- /dev/null +++ b/python_pkg/polish_gminy/__init__.py @@ -0,0 +1 @@ +"""Polish gminy (municipalities) Anki flashcard generator.""" diff --git a/python_pkg/polish_gminy/polish_gminy_anki.py b/python_pkg/polish_gminy/polish_gminy_anki.py new file mode 100755 index 0000000..109d4a7 --- /dev/null +++ b/python_pkg/polish_gminy/polish_gminy_anki.py @@ -0,0 +1,404 @@ +#!/usr/bin/env python3 +"""Anki flashcard generator for Polish gminy (municipalities). + +Generates Anki-compatible flashcard decks with maps showing individual +Polish municipalities highlighted on a country map. + +Uses multiprocessing to parallelize image generation for ~4x speedup. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_gminy + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +# 2500 colors for gminy (cycling through) +GMINA_COLORS = [ + "#E74C3C", + "#3498DB", + "#2ECC71", + "#9B59B6", + "#F39C12", + "#1ABC9C", + "#E91E63", + "#00BCD4", + "#8BC34A", + "#FF5722", + "#673AB7", + "#FFEB3B", + "#795548", + "#607D8B", + "#CDDC39", + "#FF9800", + "#4CAF50", + "#03A9F4", + "#F44336", + "#009688", + "#3F51B5", + "#FFC107", + "#9E9E9E", + "#00E676", + "#FF4081", + "#448AFF", + "#69F0AE", + "#FFD740", + "#40C4FF", + "#B388FF", + "#EA80FC", + "#82B1FF", + "#A7FFEB", + "#FFFF8D", + "#FF80AB", + "#536DFE", + "#64FFDA", + "#FFE57F", + "#80D8FF", + "#B9F6CA", + "#CF6679", + "#BB86FC", + "#03DAC6", + "#018786", + "#6200EE", + "#3700B3", + "#B00020", + "#FF0266", + "#C51162", + "#AA00FF", +] + + +def create_gmina_map( + gmina_name: str, + gmina_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + color_map: dict[str, str], +) -> Figure: + """Create a map showing Poland with one gmina highlighted.""" + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Get pre-computed color + fill_color = color_map.get(gmina_name, GMINA_COLORS[0]) + + # Plot the highlighted gmina + gmina_gdf.plot(ax=ax, color=fill_color, alpha=0.9) + gmina_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=3) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_gmina_image_bytes( + gmina_name: str, + gmina_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + color_map: dict[str, str], +) -> bytes: + """Generate a gmina map image as bytes.""" + fig = create_gmina_map(gmina_name, gmina_gdf, poland_boundary, color_map) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +def _build_color_map(names: list[str]) -> dict[str, str]: + """Pre-compute color mapping for all names. + + Args: + names: List of all gmina names. + + Returns: + Dictionary mapping name to color. + """ + sorted_names = sorted(names) + return { + name: GMINA_COLORS[i % len(GMINA_COLORS)] for i, name in enumerate(sorted_names) + } + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None +_mp_color_map: dict[str, str] | None = None + + +def _init_worker( + poland_geojson: str, + color_map: dict[str, str], +) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary, _mp_color_map # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + _mp_color_map = color_map + + +def _render_single_gmina(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single gmina image (worker function). + + Args: + args: Tuple of (gmina_name, gmina_geojson_str). + + Returns: + Tuple of (gmina_name, image_bytes). + """ + gmina_name, gmina_geojson = args + gmina_gdf = gpd.read_file(gmina_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + assert _mp_color_map is not None # noqa: S101 + + image_data = generate_gmina_image_bytes( + gmina_name, gmina_gdf, _mp_poland_boundary, _mp_color_map + ) + return gmina_name, image_data + + +def generate_anki_package( + gminy: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Gminy", +) -> genanki.Package: + """Generate Anki package for Polish gminy.""" + model_id_hash = hashlib.md5(f"polish_gminy_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Gmina Model", + fields=[ + {"name": "GminaMap"}, + {"name": "GminaName"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{GminaMap}}
', + "afmt": '
{{GminaMap}}
' + '
' + '
{{GminaName}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Pre-compute color mapping once (avoids O(n²) sorting) + color_map = _build_color_map(gminy["name"].tolist()) + + # Prepare data for parallel processing + # Serialize GeoDataFrames to GeoJSON strings for pickling + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (gmina_name, gmina_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in gminy.iterrows(): + gmina_gdf = gpd.GeoDataFrame([row], crs=gminy.crs) + gmina_geojson = gmina_gdf.to_json() + work_items.append((row["name"], gmina_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path, color_map), + ) as pool: + for i, (gmina_name, image_data) in enumerate( + pool.imap_unordered(_render_single_gmina, work_items) + ): + results[gmina_name] = image_data + if (i + 1) % 100 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in gminy.iterrows(): + gmina_name = row["name"] + image_data = results[gmina_name] + filename = f"gmina_{gmina_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', gmina_name], + tags=["geography", "poland", "gminy"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish gminy.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_gminy.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Gminy", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("polish_gminy.apkg") + + try: + sys.stdout.write("Loading gminy data...\n") + gminy = get_polish_gminy() + poland_boundary = get_poland_boundary() + num_gminy = len(gminy) + + sys.stdout.write(f"Generating flashcards for {num_gminy} gminy...\n") + sys.stdout.write("This will take a while for ~2500 gminy...\n") + + package = generate_anki_package(gminy, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_gminy = list(gminy.iterrows())[: args.preview_count] + # Pre-compute color mapping for previews + color_map = _build_color_map(gminy["name"].tolist()) + sys.stdout.write( + f"Exporting {len(preview_gminy)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_gminy: + gmina_name = row["name"] + gmina_gdf = gpd.GeoDataFrame([row], crs=gminy.crs) + image_data = generate_gmina_image_bytes( + gmina_name, gmina_gdf, poland_boundary, color_map + ) + safe_name = gmina_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Gminy: {num_gminy}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_gminy/run.sh b/python_pkg/polish_gminy/run.sh new file mode 100755 index 0000000..ab83b32 --- /dev/null +++ b/python_pkg/polish_gminy/run.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Script to generate Polish Gminy Anki deck +# WARNING: This will take a long time (~2500 gminy) + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PREVIEW_DIR="$SCRIPT_DIR/preview_images" + +echo "=== Polish Gminy Anki Generator ===" +echo "WARNING: This may take a very long time (fetching ~2500 gminy)" +echo + +if [ ! -d "$VENV_DIR" ]; then + echo "Creating virtual environment..." + python3 -m venv "$VENV_DIR" +fi + +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +echo "Installing dependencies..." +pip install --quiet --upgrade pip +pip install --quiet matplotlib genanki geopandas requests shapely + +cd "$SCRIPT_DIR" + +# Create preview images directory +mkdir -p "$PREVIEW_DIR" + +python -m polish_gminy_anki --output polish_gminy.apkg --preview "$PREVIEW_DIR" --preview-count 5 + +echo +echo "Done! The Anki deck is at: $SCRIPT_DIR/polish_gminy.apkg" +echo "Preview images are in: $PREVIEW_DIR" diff --git a/python_pkg/polish_powiaty/__init__.py b/python_pkg/polish_powiaty/__init__.py new file mode 100644 index 0000000..a01be4f --- /dev/null +++ b/python_pkg/polish_powiaty/__init__.py @@ -0,0 +1 @@ +"""Polish powiaty (counties) Anki flashcard generator.""" diff --git a/python_pkg/polish_powiaty/polish_powiaty_anki.py b/python_pkg/polish_powiaty/polish_powiaty_anki.py new file mode 100755 index 0000000..1080fda --- /dev/null +++ b/python_pkg/polish_powiaty/polish_powiaty_anki.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python3 +"""Anki flashcard generator for Polish powiaty (counties). + +Generates Anki-compatible flashcard decks with maps showing individual +Polish counties highlighted on a country map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +from pathlib import Path +import random +import sys +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_powiaty + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +# 400 distinct colors for powiaty (cycling through) +POWIAT_COLORS = [ + "#E74C3C", + "#3498DB", + "#2ECC71", + "#9B59B6", + "#F39C12", + "#1ABC9C", + "#E91E63", + "#00BCD4", + "#8BC34A", + "#FF5722", + "#673AB7", + "#FFEB3B", + "#795548", + "#607D8B", + "#CDDC39", + "#FF9800", + "#4CAF50", + "#03A9F4", + "#F44336", + "#009688", + "#3F51B5", + "#FFC107", + "#9E9E9E", + "#00E676", + "#FF4081", + "#448AFF", + "#69F0AE", + "#FFD740", + "#40C4FF", + "#B388FF", + "#EA80FC", + "#82B1FF", + "#A7FFEB", + "#FFFF8D", + "#FF80AB", + "#536DFE", + "#64FFDA", + "#FFE57F", + "#80D8FF", + "#B9F6CA", + "#CF6679", + "#BB86FC", + "#03DAC6", + "#018786", + "#6200EE", + "#3700B3", + "#B00020", + "#FF0266", + "#C51162", + "#AA00FF", +] + + +def create_powiat_map( + powiat_name: str, + powiat_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + all_powiaty: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one powiat highlighted.""" + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Assign color based on sorted names + sorted_names = sorted(all_powiaty["nazwa"].tolist()) + color_idx = sorted_names.index(powiat_name) % len(POWIAT_COLORS) + fill_color = POWIAT_COLORS[color_idx] + + # Plot the highlighted powiat + powiat_gdf.plot(ax=ax, color=fill_color, alpha=0.9) + powiat_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=3) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_powiat_image_bytes( + powiat_name: str, + powiat_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + all_powiaty: gpd.GeoDataFrame, +) -> bytes: + """Generate a powiat map image as bytes.""" + fig = create_powiat_map(powiat_name, powiat_gdf, poland_boundary, all_powiaty) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +def generate_anki_package( + powiaty: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Powiaty", +) -> genanki.Package: + """Generate Anki package for Polish powiaty.""" + model_id_hash = hashlib.md5(f"polish_powiaty_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Powiat Model", + fields=[ + {"name": "PowiatMap"}, + {"name": "PowiatName"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{PowiatMap}}
', + "afmt": '
{{PowiatMap}}
' + '
' + '
{{PowiatName}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + for _, row in powiaty.iterrows(): + powiat_name = row["nazwa"] + powiat_gdf = gpd.GeoDataFrame([row], crs=powiaty.crs) + + image_data = generate_powiat_image_bytes( + powiat_name, powiat_gdf, poland_boundary, powiaty + ) + filename = f"powiat_{powiat_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', powiat_name], + tags=["geography", "poland", "powiaty"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish powiaty.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_powiaty.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Powiaty", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("polish_powiaty.apkg") + + try: + sys.stdout.write("Loading powiaty data...\n") + powiaty = get_polish_powiaty() + poland_boundary = get_poland_boundary() + num_powiaty = len(powiaty) + + sys.stdout.write(f"Generating flashcards for {num_powiaty} powiaty...\n") + + package = generate_anki_package(powiaty, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_powiaty = list(powiaty.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_powiaty)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_powiaty: + powiat_name = row["nazwa"] + powiat_gdf = gpd.GeoDataFrame([row], crs=powiaty.crs) + image_data = generate_powiat_image_bytes( + powiat_name, powiat_gdf, poland_boundary, powiaty + ) + safe_name = powiat_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Powiaty: {num_powiaty}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_powiaty/run.sh b/python_pkg/polish_powiaty/run.sh new file mode 100755 index 0000000..2d5d8a1 --- /dev/null +++ b/python_pkg/polish_powiaty/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Script to generate Polish Powiaty Anki deck + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PREVIEW_DIR="$SCRIPT_DIR/preview_images" + +echo "=== Polish Powiaty Anki Generator ===" +echo + +if [ ! -d "$VENV_DIR" ]; then + echo "Creating virtual environment..." + python3 -m venv "$VENV_DIR" +fi + +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +echo "Installing dependencies..." +pip install --quiet --upgrade pip +pip install --quiet matplotlib genanki geopandas + +cd "$SCRIPT_DIR" + +# Create preview images directory +mkdir -p "$PREVIEW_DIR" + +python -m polish_powiaty_anki --output polish_powiaty.apkg --preview "$PREVIEW_DIR" --preview-count 5 + +echo +echo "Done! The Anki deck is at: $SCRIPT_DIR/polish_powiaty.apkg" +echo "Preview images are in: $PREVIEW_DIR" diff --git a/python_pkg/preview_all.html b/python_pkg/preview_all.html new file mode 100644 index 0000000..f1347a2 --- /dev/null +++ b/python_pkg/preview_all.html @@ -0,0 +1,82 @@ + + + + + All Preview Images + + + +

🗺️ Anki Geography Cards - Preview Images

+ +

🛣️ Warsaw Streets (Top 5 Longest)

+ + +

🚇 Warsaw Metro Stations

+ + +

🌉 Warsaw Bridges

+ + +

🏛️ Warsaw Landmarks

+ + +

🏘️ Warsaw Osiedla

+ + +

🗺️ Polish Powiaty

+ + +

🏙️ Warsaw Districts

+ + + diff --git a/python_pkg/warsaw_bridges/__init__.py b/python_pkg/warsaw_bridges/__init__.py new file mode 100644 index 0000000..4d27c39 --- /dev/null +++ b/python_pkg/warsaw_bridges/__init__.py @@ -0,0 +1 @@ +"""Warsaw bridges Anki flashcard generator.""" diff --git a/python_pkg/warsaw_bridges/run.sh b/python_pkg/warsaw_bridges/run.sh new file mode 100755 index 0000000..c00a15c --- /dev/null +++ b/python_pkg/warsaw_bridges/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Script to generate Warsaw Bridges Anki deck + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PREVIEW_DIR="$SCRIPT_DIR/preview_images" + +echo "=== Warsaw Bridges Anki Generator ===" +echo + +if [ ! -d "$VENV_DIR" ]; then + echo "Creating virtual environment..." + python3 -m venv "$VENV_DIR" +fi + +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +echo "Installing dependencies..." +pip install --quiet --upgrade pip +pip install --quiet matplotlib genanki geopandas requests shapely + +cd "$SCRIPT_DIR" + +# Create preview images directory +mkdir -p "$PREVIEW_DIR" + +python -m warsaw_bridges_anki --output warsaw_bridges.apkg --preview "$PREVIEW_DIR" --preview-count 5 + +echo +echo "Done! The Anki deck is at: $SCRIPT_DIR/warsaw_bridges.apkg" +echo "Preview images are in: $PREVIEW_DIR" diff --git a/python_pkg/warsaw_bridges/warsaw_bridges_anki.py b/python_pkg/warsaw_bridges/warsaw_bridges_anki.py new file mode 100755 index 0000000..883ef70 --- /dev/null +++ b/python_pkg/warsaw_bridges/warsaw_bridges_anki.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 +"""Anki flashcard generator for Warsaw bridges over the Vistula. + +Generates Anki-compatible flashcard decks with maps showing individual +Warsaw bridges highlighted on a city map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +from pathlib import Path +import random +import sys +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib.pyplot as plt + +# Import shared data module +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_vistula_river, get_warsaw_bridges + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +# Bridge color +BRIDGE_COLOR = "#E74C3C" # Red +RIVER_COLOR = "#3498DB" # Blue + + +def load_warsaw_boundary() -> gpd.GeoDataFrame: + """Load Warsaw boundary from districts GeoJSON. + + Returns: + GeoDataFrame with Warsaw boundary. + + Raises: + FileNotFoundError: If boundary data file not found. + """ + districts_path = ( + Path(__file__).parent.parent / "warsaw_districts" / "warszawa-dzielnice.geojson" + ) + if districts_path.exists(): + warsaw_gdf = gpd.read_file(districts_path) + warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"] + if len(warsaw_boundary) == 0: + warsaw_boundary = gpd.GeoDataFrame( + geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs + ) + return warsaw_boundary + + msg = "Warsaw boundary data not found" + raise FileNotFoundError(msg) + + +def create_bridge_map( + bridge_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + vistula: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Warsaw with one bridge highlighted. + + Args: + bridge_gdf: GeoDataFrame with the bridge to highlight. + warsaw_boundary: GeoDataFrame with Warsaw boundary. + vistula: GeoDataFrame with Vistula river geometry. + + Returns: + Matplotlib figure with the map. + """ + fig, ax = plt.subplots(figsize=(10, 10)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Warsaw as a plain gray shape + warsaw_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + warsaw_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=2) + + # Plot Vistula river + vistula.plot(ax=ax, color=RIVER_COLOR, linewidth=3, alpha=0.7) + + # Plot the bridge + bridge_gdf.plot(ax=ax, color=BRIDGE_COLOR, linewidth=6, alpha=0.9) + + # Set bounds to Warsaw + bounds = warsaw_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_bridge_image_bytes( + bridge_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + vistula: gpd.GeoDataFrame, +) -> bytes: + """Generate a bridge map image as bytes. + + Args: + bridge_gdf: GeoDataFrame with the bridge. + warsaw_boundary: GeoDataFrame with Warsaw boundary. + vistula: GeoDataFrame with Vistula river. + + Returns: + PNG image bytes. + """ + fig = create_bridge_map(bridge_gdf, warsaw_boundary, vistula) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +def generate_anki_package( + bridges: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + vistula: gpd.GeoDataFrame, + deck_name: str = "Warsaw Bridges", +) -> genanki.Package: + """Generate Anki package for Warsaw bridges. + + Args: + bridges: GeoDataFrame with all bridges. + warsaw_boundary: GeoDataFrame with Warsaw boundary. + vistula: GeoDataFrame with Vistula river. + deck_name: Name for the Anki deck. + + Returns: + Generated Anki package. + """ + model_id_hash = hashlib.md5(f"warsaw_bridges_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +""" + + my_model = genanki.Model( + model_id, + "Warsaw Bridge Model", + fields=[ + {"name": "BridgeMap"}, + {"name": "BridgeName"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{BridgeMap}}
', + "afmt": '
{{BridgeMap}}
' + '
' + '
{{BridgeName}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + for _, row in bridges.iterrows(): + bridge_name = row["name"] + bridge_gdf = gpd.GeoDataFrame([row], crs=bridges.crs) + + image_data = generate_bridge_image_bytes(bridge_gdf, warsaw_boundary, vistula) + filename = f"bridge_{bridge_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', bridge_name], + tags=["geography", "warsaw", "bridges"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point. + + Args: + argv: Command-line arguments. + + Returns: + Exit code. + """ + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Warsaw bridges.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: warsaw_bridges.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Warsaw Bridges", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("warsaw_bridges.apkg") + + try: + sys.stdout.write("Loading bridge data...\n") + bridges = get_warsaw_bridges() + vistula = get_vistula_river() + warsaw_boundary = load_warsaw_boundary() + num_bridges = len(bridges) + + sys.stdout.write(f"Generating flashcards for {num_bridges} bridges...\n") + + package = generate_anki_package( + bridges, warsaw_boundary, vistula, args.deck_name + ) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_bridges = list(bridges.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_bridges)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_bridges: + bridge_name = row["name"] + bridge_gdf = gpd.GeoDataFrame([row], crs=bridges.crs) + image_data = generate_bridge_image_bytes( + bridge_gdf, warsaw_boundary, vistula + ) + safe_name = bridge_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Bridges: {num_bridges}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/warsaw_landmarks/__init__.py b/python_pkg/warsaw_landmarks/__init__.py new file mode 100644 index 0000000..67717a7 --- /dev/null +++ b/python_pkg/warsaw_landmarks/__init__.py @@ -0,0 +1 @@ +"""Warsaw landmarks Anki flashcard generator.""" diff --git a/python_pkg/warsaw_landmarks/run.sh b/python_pkg/warsaw_landmarks/run.sh new file mode 100755 index 0000000..d2db000 --- /dev/null +++ b/python_pkg/warsaw_landmarks/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Script to generate Warsaw Landmarks Anki deck + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PREVIEW_DIR="$SCRIPT_DIR/preview_images" + +echo "=== Warsaw Landmarks Anki Generator ===" +echo + +if [ ! -d "$VENV_DIR" ]; then + echo "Creating virtual environment..." + python3 -m venv "$VENV_DIR" +fi + +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +echo "Installing dependencies..." +pip install --quiet --upgrade pip +pip install --quiet matplotlib genanki geopandas requests shapely + +cd "$SCRIPT_DIR" + +# Create preview images directory +mkdir -p "$PREVIEW_DIR" + +python -m warsaw_landmarks_anki --output warsaw_landmarks.apkg --preview "$PREVIEW_DIR" --preview-count 5 + +echo +echo "Done! The Anki deck is at: $SCRIPT_DIR/warsaw_landmarks.apkg" +echo "Preview images are in: $PREVIEW_DIR" diff --git a/python_pkg/warsaw_landmarks/warsaw_landmarks_anki.py b/python_pkg/warsaw_landmarks/warsaw_landmarks_anki.py new file mode 100755 index 0000000..86978b1 --- /dev/null +++ b/python_pkg/warsaw_landmarks/warsaw_landmarks_anki.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +"""Anki flashcard generator for Warsaw landmarks. + +Generates Anki-compatible flashcard decks with maps showing individual +Warsaw landmarks (monuments, museums, parks, historic sites). +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +from pathlib import Path +import random +import sys +from typing import TYPE_CHECKING + +sys.path.insert(0, str(Path(__file__).parent.parent)) +import genanki +from geo_data import get_warsaw_landmarks +import geopandas as gpd +import matplotlib.pyplot as plt + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +# Landmark marker color +LANDMARK_COLOR = "#9B59B6" # Purple + + +def load_warsaw_boundary() -> gpd.GeoDataFrame: + """Load Warsaw boundary from districts GeoJSON.""" + districts_path = ( + Path(__file__).parent.parent / "warsaw_districts" / "warszawa-dzielnice.geojson" + ) + if districts_path.exists(): + warsaw_gdf = gpd.read_file(districts_path) + warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"] + if len(warsaw_boundary) == 0: + warsaw_boundary = gpd.GeoDataFrame( + geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs + ) + return warsaw_boundary + + msg = "Warsaw boundary data not found" + raise FileNotFoundError(msg) + + +def create_landmark_map( + landmark_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Warsaw with one landmark highlighted.""" + fig, ax = plt.subplots(figsize=(10, 10)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Warsaw as a plain gray shape + warsaw_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + warsaw_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=2) + + # Plot the landmark as a star marker + landmark_gdf.plot( + ax=ax, + color=LANDMARK_COLOR, + markersize=400, + marker="*", + alpha=0.9, + edgecolor="#1A1A1A", + linewidth=1.5, + ) + + # Set bounds to Warsaw + bounds = warsaw_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_landmark_image_bytes( + landmark_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a landmark map image as bytes.""" + fig = create_landmark_map(landmark_gdf, warsaw_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +def generate_anki_package( + landmarks: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + deck_name: str = "Warsaw Landmarks", +) -> genanki.Package: + """Generate Anki package for Warsaw landmarks.""" + model_id_hash = hashlib.md5(f"warsaw_landmarks_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +""" + + my_model = genanki.Model( + model_id, + "Warsaw Landmark Model", + fields=[ + {"name": "LandmarkMap"}, + {"name": "LandmarkName"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{LandmarkMap}}
', + "afmt": '
{{LandmarkMap}}
' + '
' + '
{{LandmarkName}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + for _, row in landmarks.iterrows(): + landmark_name = row["name"] + landmark_gdf = gpd.GeoDataFrame([row], crs=landmarks.crs) + + image_data = generate_landmark_image_bytes(landmark_gdf, warsaw_boundary) + filename = f"landmark_{landmark_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', landmark_name], + tags=["geography", "warsaw", "landmarks"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Warsaw landmarks.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: warsaw_landmarks.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Warsaw Landmarks", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("warsaw_landmarks.apkg") + + try: + sys.stdout.write("Loading landmark data...\n") + landmarks = get_warsaw_landmarks() + warsaw_boundary = load_warsaw_boundary() + num_landmarks = len(landmarks) + + sys.stdout.write(f"Generating flashcards for {num_landmarks} landmarks...\n") + + package = generate_anki_package(landmarks, warsaw_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_landmarks = list(landmarks.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_landmarks)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_landmarks: + landmark_name = row["name"] + landmark_gdf = gpd.GeoDataFrame([row], crs=landmarks.crs) + image_data = generate_landmark_image_bytes( + landmark_gdf, warsaw_boundary + ) + safe_name = landmark_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Landmarks: {num_landmarks}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/warsaw_metro/__init__.py b/python_pkg/warsaw_metro/__init__.py new file mode 100644 index 0000000..f08d025 --- /dev/null +++ b/python_pkg/warsaw_metro/__init__.py @@ -0,0 +1 @@ +"""Warsaw metro stations Anki flashcard generator.""" diff --git a/python_pkg/warsaw_metro/run.sh b/python_pkg/warsaw_metro/run.sh new file mode 100755 index 0000000..8b05b6a --- /dev/null +++ b/python_pkg/warsaw_metro/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Script to generate Warsaw Metro Anki deck + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PREVIEW_DIR="$SCRIPT_DIR/preview_images" + +echo "=== Warsaw Metro Stations Anki Generator ===" +echo + +if [ ! -d "$VENV_DIR" ]; then + echo "Creating virtual environment..." + python3 -m venv "$VENV_DIR" +fi + +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +echo "Installing dependencies..." +pip install --quiet --upgrade pip +pip install --quiet matplotlib genanki geopandas requests shapely + +cd "$SCRIPT_DIR" + +# Create preview images directory +mkdir -p "$PREVIEW_DIR" + +python -m warsaw_metro_anki --output warsaw_metro.apkg --preview "$PREVIEW_DIR" --preview-count 5 + +echo +echo "Done! The Anki deck is at: $SCRIPT_DIR/warsaw_metro.apkg" +echo "Preview images are in: $PREVIEW_DIR" diff --git a/python_pkg/warsaw_metro/warsaw_metro_anki.py b/python_pkg/warsaw_metro/warsaw_metro_anki.py new file mode 100755 index 0000000..c8d7b50 --- /dev/null +++ b/python_pkg/warsaw_metro/warsaw_metro_anki.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +"""Anki flashcard generator for Warsaw metro stations. + +Generates Anki-compatible flashcard decks with maps showing individual +Warsaw metro stations highlighted on a city map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +from pathlib import Path +import random +import sys +from typing import TYPE_CHECKING + +sys.path.insert(0, str(Path(__file__).parent.parent)) +import genanki +from geo_data import get_warsaw_metro_stations +import geopandas as gpd +import matplotlib.pyplot as plt + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +# Station marker color +STATION_COLOR = "#E74C3C" + + +def load_warsaw_boundary() -> gpd.GeoDataFrame: + """Load Warsaw boundary from districts GeoJSON. + + Returns: + GeoDataFrame with Warsaw boundary. + """ + districts_path = ( + Path(__file__).parent.parent / "warsaw_districts" / "warszawa-dzielnice.geojson" + ) + if districts_path.exists(): + warsaw_gdf = gpd.read_file(districts_path) + warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"] + if len(warsaw_boundary) == 0: + warsaw_boundary = gpd.GeoDataFrame( + geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs + ) + return warsaw_boundary + + msg = "Warsaw boundary data not found" + raise FileNotFoundError(msg) + + +def create_station_map( + station_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Warsaw with one metro station highlighted. + + Args: + station_gdf: GeoDataFrame with the station point. + warsaw_boundary: GeoDataFrame with Warsaw boundary. + + Returns: + A matplotlib Figure object. + """ + fig, ax = plt.subplots(figsize=(10, 10)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Warsaw as a plain gray shape + warsaw_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + warsaw_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=2) + + # Plot the station as a large dot + station_gdf.plot( + ax=ax, + color=STATION_COLOR, + markersize=300, + marker="o", + alpha=0.9, + edgecolor="#1A1A1A", + linewidth=2, + ) + + # Set bounds to Warsaw + bounds = warsaw_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_station_image_bytes( + station_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a station map image as bytes.""" + fig = create_station_map(station_gdf, warsaw_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +def generate_anki_package( + stations: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + deck_name: str = "Warsaw Metro Stations", +) -> genanki.Package: + """Generate Anki package for Warsaw metro stations.""" + model_id_hash = hashlib.md5(f"warsaw_metro_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.line-info { + font-size: 24px; + margin-top: 10px; + color: #666; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.card.night_mode .line-info { + color: #AAA; +} +""" + + my_model = genanki.Model( + model_id, + "Warsaw Metro Model", + fields=[ + {"name": "StationMap"}, + {"name": "StationName"}, + {"name": "Line"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{StationMap}}
', + "afmt": '
{{StationMap}}
' + '
' + '
{{StationName}}
' + '
{{Line}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + for _, row in stations.iterrows(): + station_name = row["name"] + line = row.get("line", "") + station_gdf = gpd.GeoDataFrame([row], crs=stations.crs) + + image_data = generate_station_image_bytes(station_gdf, warsaw_boundary) + filename = f"metro_{station_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', station_name, line], + tags=["geography", "warsaw", "metro"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Warsaw metro stations.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: warsaw_metro.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Warsaw Metro Stations", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("warsaw_metro.apkg") + + try: + sys.stdout.write("Loading metro station data...\n") + stations = get_warsaw_metro_stations() + warsaw_boundary = load_warsaw_boundary() + num_stations = len(stations) + + sys.stdout.write( + f"Generating flashcards for {num_stations} metro stations...\n" + ) + + package = generate_anki_package(stations, warsaw_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_stations = list(stations.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_stations)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_stations: + station_name = row["name"] + station_gdf = gpd.GeoDataFrame([row], crs=stations.crs) + image_data = generate_station_image_bytes(station_gdf, warsaw_boundary) + safe_name = station_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Stations: {num_stations}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/warsaw_osiedla/__init__.py b/python_pkg/warsaw_osiedla/__init__.py new file mode 100644 index 0000000..f355d33 --- /dev/null +++ b/python_pkg/warsaw_osiedla/__init__.py @@ -0,0 +1 @@ +"""Warsaw osiedla (neighborhoods) Anki flashcard generator.""" diff --git a/python_pkg/warsaw_osiedla/run.sh b/python_pkg/warsaw_osiedla/run.sh new file mode 100755 index 0000000..17bc8ba --- /dev/null +++ b/python_pkg/warsaw_osiedla/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Script to generate Warsaw Osiedla Anki deck + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PREVIEW_DIR="$SCRIPT_DIR/preview_images" + +echo "=== Warsaw Osiedla Anki Generator ===" +echo + +if [ ! -d "$VENV_DIR" ]; then + echo "Creating virtual environment..." + python3 -m venv "$VENV_DIR" +fi + +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +echo "Installing dependencies..." +pip install --quiet --upgrade pip +pip install --quiet matplotlib genanki geopandas requests shapely + +cd "$SCRIPT_DIR" + +# Create preview images directory +mkdir -p "$PREVIEW_DIR" + +python -m warsaw_osiedla_anki --output warsaw_osiedla.apkg --preview "$PREVIEW_DIR" --preview-count 5 + +echo +echo "Done! The Anki deck is at: $SCRIPT_DIR/warsaw_osiedla.apkg" +echo "Preview images are in: $PREVIEW_DIR" diff --git a/python_pkg/warsaw_osiedla/warsaw_osiedla_anki.py b/python_pkg/warsaw_osiedla/warsaw_osiedla_anki.py new file mode 100755 index 0000000..5a3ee77 --- /dev/null +++ b/python_pkg/warsaw_osiedla/warsaw_osiedla_anki.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 +"""Anki flashcard generator for Warsaw osiedla (neighborhoods). + +Generates Anki-compatible flashcard decks with maps showing individual +Warsaw neighborhoods highlighted on a city map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +from pathlib import Path +import random +import sys +from typing import TYPE_CHECKING + +sys.path.insert(0, str(Path(__file__).parent.parent)) +import genanki +from geo_data import get_warsaw_osiedla +import geopandas as gpd +import matplotlib.pyplot as plt + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +# 50 unique colors for neighborhoods +OSIEDLE_COLORS = [ + "#E74C3C", + "#3498DB", + "#2ECC71", + "#9B59B6", + "#F39C12", + "#1ABC9C", + "#E91E63", + "#00BCD4", + "#8BC34A", + "#FF5722", + "#673AB7", + "#FFEB3B", + "#795548", + "#607D8B", + "#CDDC39", + "#FF9800", + "#4CAF50", + "#03A9F4", + "#F44336", + "#009688", + "#3F51B5", + "#FFC107", + "#9E9E9E", + "#00E676", + "#FF4081", + "#448AFF", + "#69F0AE", + "#FFD740", + "#40C4FF", + "#B388FF", + "#EA80FC", + "#82B1FF", + "#A7FFEB", + "#FFFF8D", + "#FF80AB", + "#536DFE", + "#64FFDA", + "#FFE57F", + "#80D8FF", + "#B9F6CA", + "#CF6679", + "#BB86FC", + "#03DAC6", + "#018786", + "#6200EE", + "#3700B3", + "#B00020", + "#FF0266", + "#C51162", + "#AA00FF", +] + + +def load_warsaw_boundary() -> gpd.GeoDataFrame: + """Load Warsaw boundary from districts GeoJSON.""" + districts_path = ( + Path(__file__).parent.parent / "warsaw_districts" / "warszawa-dzielnice.geojson" + ) + if districts_path.exists(): + warsaw_gdf = gpd.read_file(districts_path) + warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"] + if len(warsaw_boundary) == 0: + warsaw_boundary = gpd.GeoDataFrame( + geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs + ) + return warsaw_boundary + + msg = "Warsaw boundary data not found" + raise FileNotFoundError(msg) + + +def create_osiedle_map( + osiedle_name: str, + osiedle_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + all_osiedla: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Warsaw with one osiedle highlighted.""" + fig, ax = plt.subplots(figsize=(10, 10)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Warsaw as a plain gray shape + warsaw_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + warsaw_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=2) + + # Assign color based on sorted names + sorted_names = sorted(all_osiedla["name"].tolist()) + color_idx = sorted_names.index(osiedle_name) % len(OSIEDLE_COLORS) + fill_color = OSIEDLE_COLORS[color_idx] + + # Plot the highlighted osiedle + osiedle_gdf.plot(ax=ax, color=fill_color, alpha=0.9) + osiedle_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=4) + + # Set bounds to Warsaw + bounds = warsaw_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_osiedle_image_bytes( + osiedle_name: str, + osiedle_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + all_osiedla: gpd.GeoDataFrame, +) -> bytes: + """Generate an osiedle map image as bytes.""" + fig = create_osiedle_map(osiedle_name, osiedle_gdf, warsaw_boundary, all_osiedla) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +def generate_anki_package( + osiedla: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, + deck_name: str = "Warsaw Osiedla", +) -> genanki.Package: + """Generate Anki package for Warsaw osiedla.""" + model_id_hash = hashlib.md5(f"warsaw_osiedla_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +""" + + my_model = genanki.Model( + model_id, + "Warsaw Osiedle Model", + fields=[ + {"name": "OsiedleMap"}, + {"name": "OsiedleName"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{OsiedleMap}}
', + "afmt": '
{{OsiedleMap}}
' + '
' + '
{{OsiedleName}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + for _, row in osiedla.iterrows(): + osiedle_name = row["name"] + osiedle_gdf = gpd.GeoDataFrame([row], crs=osiedla.crs) + + image_data = generate_osiedle_image_bytes( + osiedle_name, osiedle_gdf, warsaw_boundary, osiedla + ) + filename = f"osiedle_{osiedle_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', osiedle_name], + tags=["geography", "warsaw", "osiedla"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Warsaw osiedla.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: warsaw_osiedla.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Warsaw Osiedla", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("warsaw_osiedla.apkg") + + try: + sys.stdout.write("Loading osiedla data...\n") + osiedla = get_warsaw_osiedla() + warsaw_boundary = load_warsaw_boundary() + num_osiedla = len(osiedla) + + sys.stdout.write(f"Generating flashcards for {num_osiedla} osiedla...\n") + + package = generate_anki_package(osiedla, warsaw_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_osiedla = list(osiedla.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_osiedla)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_osiedla: + osiedle_name = row["name"] + osiedle_gdf = gpd.GeoDataFrame([row], crs=osiedla.crs) + image_data = generate_osiedle_image_bytes( + osiedle_name, osiedle_gdf, warsaw_boundary, osiedla + ) + safe_name = osiedle_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Osiedla: {num_osiedla}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/warsaw_streets/README.md b/python_pkg/warsaw_streets/README.md new file mode 100644 index 0000000..f6ac8ec --- /dev/null +++ b/python_pkg/warsaw_streets/README.md @@ -0,0 +1,37 @@ +# Warsaw Streets Anki Generator + +Generate Anki flashcards for learning major Warsaw streets. + +## Features + +- Generates flashcards for major Warsaw streets (primary, secondary, tertiary roads) +- Uses real street data from OpenStreetMap +- Front of card: Map showing Warsaw with the street highlighted +- Back of card: Street name in Polish +- Self-contained .apkg file with embedded images + +## Data Source + +Street data is fetched from OpenStreetMap via the Overpass API. + +## Installation + +```bash +pip install matplotlib genanki geopandas requests shapely +``` + +## Usage + +```bash +# Generate flashcards (fetches data from OSM) +./run.sh + +# Or run directly +python -m warsaw_streets_anki +``` + +## Notes + +- Only includes named streets tagged as primary, secondary, or tertiary highways +- Streets are filtered to remove duplicates and very short segments +- The first run will download data from Overpass API (may take a minute) diff --git a/python_pkg/warsaw_streets/__init__.py b/python_pkg/warsaw_streets/__init__.py new file mode 100644 index 0000000..d71b26e --- /dev/null +++ b/python_pkg/warsaw_streets/__init__.py @@ -0,0 +1 @@ +"""Warsaw streets Anki flashcard generator.""" diff --git a/python_pkg/warsaw_streets/run.sh b/python_pkg/warsaw_streets/run.sh new file mode 100755 index 0000000..439958e --- /dev/null +++ b/python_pkg/warsaw_streets/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Script to generate Warsaw Streets Anki deck + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PREVIEW_DIR="$SCRIPT_DIR/preview_images" + +echo "=== Warsaw Streets Anki Generator ===" +echo + +if [ ! -d "$VENV_DIR" ]; then + echo "Creating virtual environment..." + python3 -m venv "$VENV_DIR" +fi + +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +echo "Installing dependencies..." +pip install --quiet --upgrade pip +pip install --quiet matplotlib genanki geopandas requests shapely + +cd "$SCRIPT_DIR" + +# Create preview images directory +mkdir -p "$PREVIEW_DIR" + +python -m warsaw_streets_anki --output warsaw_streets.apkg --preview "$PREVIEW_DIR" --preview-count 5 + +echo +echo "Done! The Anki deck is at: $SCRIPT_DIR/warsaw_streets.apkg" +echo "Preview images are in: $PREVIEW_DIR" diff --git a/python_pkg/warsaw_streets/warsaw_streets_anki.py b/python_pkg/warsaw_streets/warsaw_streets_anki.py new file mode 100755 index 0000000..1ddd61a --- /dev/null +++ b/python_pkg/warsaw_streets/warsaw_streets_anki.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python3 +"""Anki flashcard generator for Warsaw streets. + +Generates Anki-compatible flashcard decks with maps showing individual +Warsaw streets highlighted on a city map. + +Usage: + python -m python_pkg.warsaw_streets.warsaw_streets_anki + +Output: + Creates a self-contained .apkg file that can be directly imported into Anki. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +from pathlib import Path +import random +import sys +from typing import TYPE_CHECKING, Any + +sys.path.insert(0, str(Path(__file__).parent.parent)) +import genanki +from geo_data import get_warsaw_streets +import geopandas as gpd +import matplotlib.pyplot as plt +from shapely.geometry import MultiLineString + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +# Minimum street length in meters to include +MIN_STREET_LENGTH = 500 + + +def get_unique_streets( + gdf: gpd.GeoDataFrame, +) -> list[tuple[str, gpd.GeoDataFrame, float]]: + """Group street segments by name and merge geometries. + + Args: + gdf: GeoDataFrame with street segments. + + Returns: + List of (name, GeoDataFrame, length_m) tuples, sorted by length (longest first). + """ + # Group by street name + streets: dict[str, list[Any]] = {} + for _, row in gdf.iterrows(): + name = row["name"] + if name and name != "Unknown": + if name not in streets: + streets[name] = [] + streets[name].append(row.geometry) + + # Merge geometries and calculate length + result = [] + for name, geometries in streets.items(): + merged = geometries[0] if len(geometries) == 1 else MultiLineString(geometries) + + # Create a GeoDataFrame for this street + street_gdf = gpd.GeoDataFrame( + [{"name": name, "geometry": merged}], crs="EPSG:4326" + ) + + # Calculate length in meters (approximate) + street_gdf_proj = street_gdf.to_crs("EPSG:2180") # Polish coordinate system + length = street_gdf_proj.geometry.length.iloc[0] + + if length >= MIN_STREET_LENGTH: + result.append((name, street_gdf, length)) + + # Sort by length (longest first) + result.sort(key=lambda x: x[2], reverse=True) + return result + + +def load_street_data() -> ( + tuple[list[tuple[str, gpd.GeoDataFrame, float]], gpd.GeoDataFrame] +): + """Load Warsaw streets and boundary. + + Returns: + Tuple of (streets list sorted by length, warsaw boundary GeoDataFrame). + """ + streets_gdf = get_warsaw_streets(min_length=MIN_STREET_LENGTH) + streets = get_unique_streets(streets_gdf) + + # Load Warsaw districts for boundary (reuse from warsaw_districts) + districts_path = ( + Path(__file__).parent.parent / "warsaw_districts" / "warszawa-dzielnice.geojson" + ) + if districts_path.exists(): + warsaw_gdf = gpd.read_file(districts_path) + # Get just Warsaw boundary + warsaw_boundary = warsaw_gdf[warsaw_gdf["name"] == "Warszawa"] + if len(warsaw_boundary) == 0: + # Dissolve all districts + warsaw_boundary = gpd.GeoDataFrame( + geometry=[warsaw_gdf.union_all()], crs=warsaw_gdf.crs + ) + else: + msg = "Warsaw boundary data not found" + raise FileNotFoundError(msg) + + return streets, warsaw_boundary + + +# Color for highlighted street +STREET_COLOR = "#E74C3C" # Red + + +def create_street_map( + street_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Warsaw with one street highlighted. + + Args: + street_name: Name of the street. + street_gdf: GeoDataFrame with the street geometry. + warsaw_boundary: GeoDataFrame with Warsaw boundary. + + Returns: + A matplotlib Figure object. + """ + fig, ax = plt.subplots(figsize=(10, 10)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Warsaw as a plain gray shape + warsaw_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + warsaw_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=2) + + # Plot the highlighted street + street_gdf.plot(ax=ax, color=STREET_COLOR, linewidth=4, alpha=0.9) + + # Set bounds to Warsaw + bounds = warsaw_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_street_image_bytes( + street_gdf: gpd.GeoDataFrame, + warsaw_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a street map image as bytes. + + Args: + street_gdf: GeoDataFrame with the street geometry. + warsaw_boundary: GeoDataFrame with Warsaw boundary. + + Returns: + PNG image data as bytes. + """ + fig = create_street_map(street_gdf, warsaw_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +def generate_anki_package( + streets: list[tuple[str, gpd.GeoDataFrame, float]], + warsaw_boundary: gpd.GeoDataFrame, + deck_name: str = "Warsaw Streets", +) -> genanki.Package: + """Generate Anki package for Warsaw streets. + + Args: + streets: List of (name, GeoDataFrame, length) tuples, sorted by length. + warsaw_boundary: GeoDataFrame with Warsaw boundary. + deck_name: Name for the Anki deck. + + Returns: + genanki.Package object. + """ + model_id_hash = hashlib.md5(f"warsaw_streets_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +""" + + my_model = genanki.Model( + model_id, + "Warsaw Street Model", + fields=[ + {"name": "StreetMap"}, + {"name": "StreetName"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{StreetMap}}
', + "afmt": '
{{StreetMap}}
' + '
' + '
{{StreetName}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Streets are already sorted by length (longest first) + for street_name, street_gdf, _length in streets: + image_data = generate_street_image_bytes(street_gdf, warsaw_boundary) + filename = f"street_{street_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', street_name], + tags=["geography", "warsaw", "streets"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Warsaw streets.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: warsaw_streets.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Warsaw Streets", + help="Name for the Anki deck", + ) + parser.add_argument( + "--min-length", + "-m", + type=int, + default=MIN_STREET_LENGTH, + help=f"Minimum street length in meters (default: {MIN_STREET_LENGTH})", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("warsaw_streets.apkg") + + try: + sys.stdout.write("Loading street data...\n") + streets, warsaw_boundary = load_street_data() + num_streets = len(streets) + + sys.stdout.write(f"Generating flashcards for {num_streets} Warsaw streets...\n") + + package = generate_anki_package(streets, warsaw_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested (top N longest streets) + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_streets = streets[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_streets)} preview images " + f"(longest streets) to {preview_dir}...\n" + ) + for street_name, street_gdf, length_m in preview_streets: + image_data = generate_street_image_bytes(street_gdf, warsaw_boundary) + safe_name = street_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name} ({length_m:.0f}m)\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Streets: {num_streets}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main())