diff --git a/python_pkg/geo_data.py b/python_pkg/geo_data.py index 07cdae0..825d7e1 100644 --- a/python_pkg/geo_data.py +++ b/python_pkg/geo_data.py @@ -21,7 +21,13 @@ from urllib.request import urlopen import geopandas as gpd import requests -from shapely.geometry import LineString, MultiLineString +from shapely.geometry import ( + GeometryCollection, + LineString, + MultiLineString, + MultiPolygon, + Polygon, +) if TYPE_CHECKING: from typing import Any @@ -48,12 +54,54 @@ REQUEST_TIMEOUT = 180 MAX_RETRIES = 3 RETRY_DELAY = 5 +# Data thresholds for filtering +MIN_PEAK_ELEVATION = 300 # meters +MIN_LAKE_AREA_KM2 = 0.5 # km² +MIN_RIVER_LENGTH_KM = 10 # km +MIN_LINE_COORDS = 2 # minimum coordinates for a line +MIN_RING_COORDS = 4 # minimum coordinates for a polygon ring + def _ensure_cache_dir() -> None: """Create cache directory if it doesn't exist.""" CACHE_DIR.mkdir(parents=True, exist_ok=True) +def _extract_polygonal_geometry( + geom: Polygon | MultiPolygon | GeometryCollection, +) -> Polygon | MultiPolygon | None: + """Extract only polygonal geometry from a geometry that may be mixed. + + Some OSM data comes as GeometryCollections containing polygons mixed with + lines. This function extracts only the polygon/multipolygon parts. + + Args: + geom: Input geometry (Polygon, MultiPolygon, or GeometryCollection). + + Returns: + Polygon or MultiPolygon with only the polygonal parts, or None if empty. + """ + if isinstance(geom, Polygon | MultiPolygon): + return geom + + if isinstance(geom, GeometryCollection): + polygons = [g for g in geom.geoms if isinstance(g, Polygon | MultiPolygon)] + if not polygons: + return None + if len(polygons) == 1: + return polygons[0] + # Flatten MultiPolygons and combine all polygons + all_polys = [] + for p in polygons: + if isinstance(p, Polygon): + all_polys.append(p) + elif isinstance(p, MultiPolygon): + all_polys.extend(p.geoms) + return MultiPolygon(all_polys) + + return None + + def _query_wikidata(query: str) -> list[dict[str, Any]]: """Query Wikidata SPARQL endpoint. @@ -761,6 +809,125 @@ def _build_osiedla_geometry( } +def _extract_polygon_from_element( + element: dict[str, Any], +) -> dict[str, Any] | None: + """Extract polygon geometry from an OSM relation or way element. + + Args: + element: OSM element (relation or way). + + Returns: + GeoJSON geometry dict, or None if extraction fails. + """ + if element.get("type") == "relation": + outer_rings, inner_rings = _extract_osiedla_rings(element, MIN_RING_COORDS) + if not outer_rings: + return None + return _build_osiedla_geometry(outer_rings, inner_rings) + + if element.get("type") == "way" and "geometry" in element: + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) < MIN_RING_COORDS: + return None + if coords[0] != coords[-1]: + coords.append(coords[0]) + return {"type": "Polygon", "coordinates": [coords]} + + return None + + +def _extract_line_from_way(element: dict[str, Any]) -> dict[str, Any] | None: + """Extract line geometry from an OSM way element. + + Args: + element: OSM way element. + + Returns: + GeoJSON LineString geometry dict, or None if extraction fails. + """ + if element.get("type") != "way" or "geometry" not in element: + return None + + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) < MIN_LINE_COORDS: + return None + + return {"type": "LineString", "coordinates": coords} + + +def _add_area_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """Add area_km2 column to a GeoDataFrame. + + Args: + gdf: GeoDataFrame with polygon geometries. + + Returns: + GeoDataFrame with area_km2 column added. + """ + if len(gdf) == 0: + return gdf + gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system + gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 + return gdf + + +def _add_length_column(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """Add length_km column to a GeoDataFrame. + + Args: + gdf: GeoDataFrame with line geometries. + + Returns: + GeoDataFrame with length_km column added. + """ + if len(gdf) == 0: + return gdf + gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system + gdf["length_km"] = gdf_proj.geometry.length / 1000 + return gdf + + +def _extract_coastal_geometry( + element: dict[str, Any], + natural_type: str, + line_types: tuple[str, ...], +) -> dict[str, Any] | None: + """Extract geometry from a coastal feature element. + + For cliffs and beaches, returns LineString. For others, returns Polygon. + + Args: + element: OSM element. + natural_type: The natural= tag value. + line_types: Tuple of natural types that should be lines. + + Returns: + GeoJSON geometry dict, or None if extraction fails. + """ + if element.get("type") == "relation": + return _extract_polygon_from_element(element) + + if element.get("type") != "way" or "geometry" not in element: + return None + + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) < MIN_LINE_COORDS: + return None + + # For cliffs and beaches, keep as linestring + if natural_type in line_types: + return {"type": "LineString", "coordinates": coords} + + # Otherwise try to make a polygon + if len(coords) >= MIN_RING_COORDS: + if coords[0] != coords[-1]: + coords.append(coords[0]) + return {"type": "Polygon", "coordinates": [coords]} + + return None + + def get_warsaw_osiedla() -> gpd.GeoDataFrame: """Get Warsaw osiedla (neighborhoods). @@ -868,7 +1035,7 @@ def get_polish_powiaty() -> gpd.GeoDataFrame: def get_polish_gminy() -> gpd.GeoDataFrame: - """Get Polish gminy (municipalities) from OSM. + """Get Polish gminy (municipalities) from OSM, sorted by area descending. Returns: GeoDataFrame with gminy boundaries. @@ -876,7 +1043,10 @@ def get_polish_gminy() -> gpd.GeoDataFrame: cache_path = CACHE_DIR / "polish_gminy.geojson" if cache_path.exists(): - return gpd.read_file(cache_path) + gdf = gpd.read_file(cache_path) + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf sys.stdout.write("Fetching gminy data from OSM (this may take a while)...\n") # Polish gminy are admin_level=7 in OSM @@ -919,7 +1089,12 @@ def get_polish_gminy() -> gpd.GeoDataFrame: cache_path.write_text(json.dumps(geojson)) sys.stdout.write(f"Cached {len(features)} gminy.\n") - return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + # Add area column + gdf = _add_area_column(gdf) + + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) def get_poland_boundary() -> gpd.GeoDataFrame: @@ -945,6 +1120,794 @@ def get_poland_boundary() -> gpd.GeoDataFrame: return poland +# ============================================================================= +# Polish Natural Features +# ============================================================================= + + +def get_polish_mountain_peaks() -> gpd.GeoDataFrame: + """Get Polish mountain peaks, sorted by elevation descending. + + Returns: + GeoDataFrame with mountain peak points and elevation. + """ + cache_path = CACHE_DIR / "polish_mountain_peaks.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + return gdf.sort_values("elevation", ascending=False).reset_index(drop=True) + + sys.stdout.write("Fetching mountain peaks data from OSM...\n") + query = """ + [out:json][timeout:120]; + area["ISO3166-1"="PL"]->.pl; + ( + node["natural"="peak"]["name"]["ele"](area.pl); + ); + out; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + if element.get("type") != "node": + continue + + name = element.get("tags", {}).get("name", "") + ele_str = element.get("tags", {}).get("ele", "") + + if not name or not ele_str or name in seen_names: + continue + + with contextlib.suppress(ValueError): + elevation = float(ele_str.replace(",", ".").split()[0]) + if elevation < MIN_PEAK_ELEVATION: + continue + + seen_names.add(name) + features.append( + { + "type": "Feature", + "properties": {"name": name, "elevation": elevation}, + "geometry": { + "type": "Point", + "coordinates": [element["lon"], element["lat"]], + }, + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} mountain peaks.\n") + + if not features: + msg = "No mountain peaks found in OSM data" + raise ValueError(msg) + + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + return gdf.sort_values("elevation", ascending=False).reset_index(drop=True) + + +def get_polish_mountain_ranges() -> gpd.GeoDataFrame: + """Get Polish mountain ranges, sorted by area descending. + + Returns: + GeoDataFrame with mountain range polygons. + """ + cache_path = CACHE_DIR / "polish_mountain_ranges.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + # Fix invalid geometries from OSM data and extract only polygons + gdf["geometry"] = gdf.geometry.make_valid() + gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) + gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching mountain ranges data from OSM...\n") + query = """ + [out:json][timeout:180]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["natural"="mountain_range"]["name"](area.pl); + way["natural"="mountain_range"]["name"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + min_ring_coords = 4 + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + if element.get("type") == "relation": + outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) + if not outer_rings: + continue + geometry = _build_osiedla_geometry(outer_rings, inner_rings) + elif element.get("type") == "way" and "geometry" in element: + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) < min_ring_coords: + continue + if coords[0] != coords[-1]: + coords.append(coords[0]) + geometry = {"type": "Polygon", "coordinates": [coords]} + else: + continue + + seen_names.add(name) + features.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geometry} + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} mountain ranges.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + # Fix invalid geometries from OSM data and extract only polygons + gdf["geometry"] = gdf.geometry.make_valid() + gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) + gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] + + # Calculate area in km² + gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system + gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 + + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + + +def get_polish_national_parks() -> gpd.GeoDataFrame: + """Get all 23 Polish national parks, sorted by area descending. + + Returns: + GeoDataFrame with national park polygons. + """ + cache_path = CACHE_DIR / "polish_national_parks.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching national parks data from OSM...\n") + query = """ + [out:json][timeout:180]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["boundary"="national_park"]["name"](area.pl); + relation["leisure"="nature_reserve"]["name"]["protect_class"="2"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + min_ring_coords = 4 + + for element in data.get("elements", []): + if element.get("type") != "relation": + continue + + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + # Filter to only include "Park Narodowy" in name + if "Narodowy" not in name and "narodowy" not in name.lower(): + continue + + outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) + if not outer_rings: + continue + + seen_names.add(name) + features.append( + { + "type": "Feature", + "properties": {"name": name}, + "geometry": _build_osiedla_geometry(outer_rings, inner_rings), + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} national parks.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + # Calculate area in km² + gdf_proj = gdf.to_crs("EPSG:2180") + gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 + + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + + +def get_polish_forests() -> gpd.GeoDataFrame: + """Get major Polish forests (puszcze), sorted by area descending. + + Returns: + GeoDataFrame with forest polygons. + """ + cache_path = CACHE_DIR / "polish_forests.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching forests data from OSM...\n") + # Query for named forests, especially "Puszcza" type + query = """ + [out:json][timeout:300]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["natural"="wood"]["name"](area.pl); + relation["landuse"="forest"]["name"~"Puszcza|Bory|Las"](area.pl); + way["natural"="wood"]["name"~"Puszcza|Bory"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + forest_keywords = ("Puszcza", "Bory", "Las ", "Lasy ") + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + if not any(keyword in name for keyword in forest_keywords): + continue + + geometry = _extract_polygon_from_element(element) + if geometry is None: + continue + + seen_names.add(name) + features.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geometry} + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} forests.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + gdf = _add_area_column(gdf) + + if len(gdf) > 0: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + +def get_polish_lakes() -> gpd.GeoDataFrame: + """Get Polish lakes, sorted by area descending. + + Returns: + GeoDataFrame with lake polygons. + """ + cache_path = CACHE_DIR / "polish_lakes.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching lakes data from OSM...\n") + query = """ + [out:json][timeout:300]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["natural"="water"]["water"="lake"]["name"](area.pl); + way["natural"="water"]["water"="lake"]["name"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + geometry = _extract_polygon_from_element(element) + if geometry is None: + continue + + seen_names.add(name) + features.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geometry} + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} lakes.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + gdf = _add_area_column(gdf) + + if len(gdf) > 0: + # Filter to lakes > MIN_LAKE_AREA_KM2 to exclude tiny ponds + gdf = gdf[gdf["area_km2"] > MIN_LAKE_AREA_KM2] + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + + return gdf + + +def _extract_river_coords_from_element( + element: dict[str, Any], +) -> list[list[tuple[float, float]]]: + """Extract coordinate lists from a river element. + + Args: + element: OSM element (way or relation). + + Returns: + List of coordinate lists (line segments). + """ + coord_lists: list[list[tuple[float, float]]] = [] + + if element.get("type") == "way" and "geometry" in element: + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) >= MIN_LINE_COORDS: + coord_lists.append(coords) + elif element.get("type") == "relation": + for member in element.get("members", []): + if member.get("type") == "way" and "geometry" in member: + coords = [(p["lon"], p["lat"]) for p in member["geometry"]] + if len(coords) >= MIN_LINE_COORDS: + coord_lists.append(coords) + + return coord_lists + + +def get_polish_rivers() -> gpd.GeoDataFrame: + """Get Polish rivers, sorted by length descending. + + Rivers with the same name but in different locations are kept separate + by using unique IDs from OSM when available. + + Returns: + GeoDataFrame with river linestrings. + """ + cache_path = CACHE_DIR / "polish_rivers.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + if "length_km" in gdf.columns: + return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching rivers data from OSM...\n") + query = """ + [out:json][timeout:300]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["waterway"="river"]["name"](area.pl); + way["waterway"="river"]["name"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + # Group ways by river name AND wikidata ID (or OSM ID for uniqueness) + # This prevents merging different rivers with the same name + rivers_by_key: dict[str, list[list[tuple[float, float]]]] = {} + river_names: dict[str, str] = {} # key -> display name + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name: + continue + + # Use wikidata ID if available, otherwise use element type+id + wikidata = element.get("tags", {}).get("wikidata", "") + if wikidata: + key = f"{name}_{wikidata}" + else: + # Fall back to element ID for grouping related ways + key = f"{name}_{element.get('type')}_{element.get('id')}" + + coord_lists = _extract_river_coords_from_element(element) + if coord_lists: + rivers_by_key.setdefault(key, []).extend(coord_lists) + river_names[key] = name + + features = [] + for key, coord_lists in rivers_by_key.items(): + name = river_names[key] + geometry: dict[str, Any] + if len(coord_lists) == 1: + geometry = {"type": "LineString", "coordinates": coord_lists[0]} + else: + geometry = {"type": "MultiLineString", "coordinates": coord_lists} + + features.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geometry} + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} rivers.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + gdf = _add_length_column(gdf) + + if len(gdf) > 0: + gdf = gdf[gdf["length_km"] > MIN_RIVER_LENGTH_KM] + return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) + + return gdf + + +def get_polish_islands() -> gpd.GeoDataFrame: + """Get Polish islands, sorted by area descending. + + Returns: + GeoDataFrame with island polygons. + """ + cache_path = CACHE_DIR / "polish_islands.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching islands data from OSM...\n") + query = """ + [out:json][timeout:180]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["place"="island"]["name"](area.pl); + way["place"="island"]["name"](area.pl); + relation["place"="islet"]["name"](area.pl); + way["place"="islet"]["name"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + geometry = _extract_polygon_from_element(element) + if geometry is None: + continue + + seen_names.add(name) + features.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geometry} + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} islands.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + gdf = _add_area_column(gdf) + + if len(gdf) > 0: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + +def get_polish_coastal_features() -> gpd.GeoDataFrame: + """Get Polish coastal features (peninsulas, spits, cliffs), sorted by length. + + Returns: + GeoDataFrame with coastal feature geometries. + """ + cache_path = CACHE_DIR / "polish_coastal_features.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + if "length_km" in gdf.columns: + return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching coastal features data from OSM...\n") + query = """ + [out:json][timeout:180]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["natural"="peninsula"]["name"](area.pl); + way["natural"="peninsula"]["name"](area.pl); + relation["natural"="spit"]["name"](area.pl); + way["natural"="spit"]["name"](area.pl); + relation["natural"="cliff"]["name"](area.pl); + way["natural"="cliff"]["name"](area.pl); + relation["natural"="coastline"]["name"](area.pl); + way["natural"="beach"]["name"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + line_types = ("cliff", "beach", "coastline") + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + natural_type = element.get("tags", {}).get("natural", "") + if not name or name in seen_names: + continue + + geometry = _extract_coastal_geometry(element, natural_type, line_types) + if geometry is None: + continue + + seen_names.add(name) + features.append( + { + "type": "Feature", + "properties": {"name": name, "type": natural_type}, + "geometry": geometry, + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} coastal features.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + gdf = _add_length_column(gdf) + + if len(gdf) > 0: + return gdf.sort_values("length_km", ascending=False).reset_index(drop=True) + return gdf + + +def get_polish_unesco_sites() -> gpd.GeoDataFrame: + """Get Polish UNESCO World Heritage Sites, sorted by inscription year. + + Returns: + GeoDataFrame with UNESCO site geometries. + """ + cache_path = CACHE_DIR / "polish_unesco_sites.geojson" + + if cache_path.exists(): + return gpd.read_file(cache_path) + + sys.stdout.write("Fetching UNESCO sites data from OSM...\n") + query = """ + [out:json][timeout:180]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["heritage"="world_heritage_site"]["name"](area.pl); + way["heritage"="world_heritage_site"]["name"](area.pl); + node["heritage"="world_heritage_site"]["name"](area.pl); + relation["heritage:operator"="whc"]["name"](area.pl); + way["heritage:operator"="whc"]["name"](area.pl); + node["heritage:operator"="whc"]["name"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + min_ring_coords = 4 + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + if element.get("type") == "node": + geometry = { + "type": "Point", + "coordinates": [element["lon"], element["lat"]], + } + elif element.get("type") == "relation": + outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) + if not outer_rings: + continue + geometry = _build_osiedla_geometry(outer_rings, inner_rings) + elif element.get("type") == "way" and "geometry" in element: + coords = [(p["lon"], p["lat"]) for p in element["geometry"]] + if len(coords) < min_ring_coords: + continue + if coords[0] != coords[-1]: + coords.append(coords[0]) + geometry = {"type": "Polygon", "coordinates": [coords]} + else: + continue + + seen_names.add(name) + features.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geometry} + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} UNESCO sites.\n") + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def get_polish_nature_reserves() -> gpd.GeoDataFrame: + """Get Polish nature reserves, sorted by area descending. + + Returns: + GeoDataFrame with nature reserve polygons. + """ + cache_path = CACHE_DIR / "polish_nature_reserves.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write( + "Fetching nature reserves data from OSM (this may take a while)...\n" + ) + query = """ + [out:json][timeout:600]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["leisure"="nature_reserve"]["name"](area.pl); + way["leisure"="nature_reserve"]["name"](area.pl); + relation["boundary"="protected_area"]["protect_class"="4"]["name"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + + for element in data.get("elements", []): + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + geometry = _extract_polygon_from_element(element) + if geometry is None: + continue + + seen_names.add(name) + features.append( + {"type": "Feature", "properties": {"name": name}, "geometry": geometry} + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} nature reserves.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + gdf = _add_area_column(gdf) + + if len(gdf) > 0: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + +def get_polish_landscape_parks() -> gpd.GeoDataFrame: + """Get Polish landscape parks, sorted by area descending. + + Returns: + GeoDataFrame with landscape park polygons. + """ + cache_path = CACHE_DIR / "polish_landscape_parks.geojson" + + if cache_path.exists(): + gdf = gpd.read_file(cache_path) + # Fix invalid geometries from OSM data and extract only polygons + gdf["geometry"] = gdf.geometry.make_valid() + gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) + # Remove any rows where geometry extraction failed + gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] + if "area_km2" in gdf.columns: + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + return gdf + + sys.stdout.write("Fetching landscape parks data from OSM...\n") + query = """ + [out:json][timeout:300]; + area["ISO3166-1"="PL"]->.pl; + ( + relation["boundary"="protected_area"]["protect_class"="5"]["name"](area.pl); + relation["leisure"="nature_reserve"]["name"~"Park Krajobrazowy"](area.pl); + ); + out geom; + """ + + data = _overpass_query(query) + + features = [] + seen_names: set[str] = set() + min_ring_coords = 4 + + for element in data.get("elements", []): + if element.get("type") != "relation": + continue + + name = element.get("tags", {}).get("name", "") + if not name or name in seen_names: + continue + + outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords) + if not outer_rings: + continue + + seen_names.add(name) + features.append( + { + "type": "Feature", + "properties": {"name": name}, + "geometry": _build_osiedla_geometry(outer_rings, inner_rings), + } + ) + + _ensure_cache_dir() + geojson = {"type": "FeatureCollection", "features": features} + cache_path.write_text(json.dumps(geojson, ensure_ascii=False)) + + sys.stdout.write(f"Cached {len(features)} landscape parks.\n") + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + # Fix invalid geometries from OSM data and extract only polygons + gdf["geometry"] = gdf.geometry.make_valid() + gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry) + # Remove any rows where geometry extraction failed + gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty] + + if len(gdf) > 0: + gdf_proj = gdf.to_crs("EPSG:2180") + gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000 + return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True) + + return gdf + + # ============================================================================= # Utility Functions # ============================================================================= diff --git a/python_pkg/polish_coastal_features/__init__.py b/python_pkg/polish_coastal_features/__init__.py new file mode 100644 index 0000000..e290be8 --- /dev/null +++ b/python_pkg/polish_coastal_features/__init__.py @@ -0,0 +1 @@ +"""Polish coastal features Anki generator.""" diff --git a/python_pkg/polish_coastal_features/polish_coastal_features_anki.py b/python_pkg/polish_coastal_features/polish_coastal_features_anki.py new file mode 100644 index 0000000..f85a41c --- /dev/null +++ b/python_pkg/polish_coastal_features/polish_coastal_features_anki.py @@ -0,0 +1,333 @@ +"""Anki flashcard generator for Polish coastal features. + +Generates Anki-compatible flashcard decks with maps showing coastal features +(peninsulas, cliffs, beaches, etc.) highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt +from shapely.geometry import LineString, MultiLineString, MultiPolygon, Polygon + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_coastal_features + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +FEATURE_COLOR_POLYGON = "#D4AC0D" # Gold for polygon features +FEATURE_COLOR_LINE = "#D4AC0D" # Gold for line features +LINE_WIDTH = 4 + + +def create_coastal_map( + feature_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one coastal feature highlighted.""" + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the feature based on geometry type + geom = feature_gdf.iloc[0].geometry + if isinstance(geom, Polygon | MultiPolygon): + feature_gdf.plot(ax=ax, color=FEATURE_COLOR_POLYGON, alpha=0.9) + feature_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=3) + elif isinstance(geom, LineString | MultiLineString): + feature_gdf.plot(ax=ax, color=FEATURE_COLOR_LINE, linewidth=LINE_WIDTH) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_coastal_image_bytes( + feature_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a coastal feature map image as bytes.""" + fig = create_coastal_map(feature_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_feature(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single feature image (worker function). + + Args: + args: Tuple of (feature_name, feature_geojson_str). + + Returns: + Tuple of (feature_name, image_bytes). + """ + feature_name, feature_geojson = args + feature_gdf = gpd.read_file(feature_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_coastal_image_bytes(feature_gdf, _mp_poland_boundary) + return feature_name, image_data + + +def generate_anki_package( + features: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Coastal Features", +) -> genanki.Package: + """Generate Anki package for Polish coastal features.""" + model_id_hash = hashlib.md5( # noqa: S324 + f"polish_coastal_features_{deck_name}".encode() + ) + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Coastal Feature Model", + fields=[ + {"name": "FeatureMap"}, + {"name": "FeatureName"}, + {"name": "FeatureType"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{FeatureMap}}
', + "afmt": '
{{FeatureMap}}
' + '
' + '
{{FeatureName}}
' + '
{{FeatureType}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (feature_name, feature_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in features.iterrows(): + feature_gdf = gpd.GeoDataFrame([row], crs=features.crs) + feature_geojson = feature_gdf.to_json() + work_items.append((row["name"], feature_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (feature_name, image_data) in enumerate( + pool.imap_unordered(_render_single_feature, work_items) + ): + results[feature_name] = image_data + if (i + 1) % 10 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in features.iterrows(): + feature_name = row["name"] + feature_type = row.get("type", "coastal feature") + image_data = results[feature_name] + filename = f"coastal_{feature_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', feature_name, feature_type], + tags=["geography", "poland", "coastal", "baltic"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish coastal features.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_coastal_features.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Coastal Features", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = ( + Path(args.output) if args.output else Path("polish_coastal_features.apkg") + ) + + try: + sys.stdout.write("Loading coastal features data...\n") + features = get_polish_coastal_features() + poland_boundary = get_poland_boundary() + num_features = len(features) + + sys.stdout.write(f"Found {num_features} coastal features.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(features, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_features = list(features.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_features)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_features: + feature_name = row["name"] + feature_gdf = gpd.GeoDataFrame([row], crs=features.crs) + image_data = generate_coastal_image_bytes(feature_gdf, poland_boundary) + safe_name = feature_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Coastal features: {num_features}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_coastal_features/run.sh b/python_pkg/polish_coastal_features/run.sh new file mode 100755 index 0000000..f0c24b3 --- /dev/null +++ b/python_pkg/polish_coastal_features/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish coastal features Anki generator + +cd "$(dirname "$0")" || exit + +python polish_coastal_features_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_forests/__init__.py b/python_pkg/polish_forests/__init__.py new file mode 100644 index 0000000..d659891 --- /dev/null +++ b/python_pkg/polish_forests/__init__.py @@ -0,0 +1 @@ +"""Polish forests Anki generator.""" diff --git a/python_pkg/polish_forests/polish_forests_anki.py b/python_pkg/polish_forests/polish_forests_anki.py new file mode 100644 index 0000000..2a186ec --- /dev/null +++ b/python_pkg/polish_forests/polish_forests_anki.py @@ -0,0 +1,322 @@ +"""Anki flashcard generator for Polish forests (puszcze). + +Generates Anki-compatible flashcard decks with maps showing large forests +highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_forests + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +FOREST_COLOR = "#1D4E2B" # Dark green for forests + + +def create_forest_map( + forest_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one forest highlighted.""" + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the forest + forest_gdf.plot(ax=ax, color=FOREST_COLOR, alpha=0.9) + forest_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=1.5) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_forest_image_bytes( + forest_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a forest map image as bytes.""" + fig = create_forest_map(forest_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_forest(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single forest image (worker function). + + Args: + args: Tuple of (forest_name, forest_geojson_str). + + Returns: + Tuple of (forest_name, image_bytes). + """ + forest_name, forest_geojson = args + forest_gdf = gpd.read_file(forest_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_forest_image_bytes(forest_gdf, _mp_poland_boundary) + return forest_name, image_data + + +def generate_anki_package( + forests: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Forests (Puszcze)", +) -> genanki.Package: + """Generate Anki package for Polish forests.""" + model_id_hash = hashlib.md5(f"polish_forests_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Forest Model", + fields=[ + {"name": "ForestMap"}, + {"name": "ForestName"}, + {"name": "Area"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{ForestMap}}
', + "afmt": '
{{ForestMap}}
' + '
' + '
{{ForestName}}
' + '
{{Area}} km²
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (forest_name, forest_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in forests.iterrows(): + forest_gdf = gpd.GeoDataFrame([row], crs=forests.crs) + forest_geojson = forest_gdf.to_json() + work_items.append((row["name"], forest_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (forest_name, image_data) in enumerate( + pool.imap_unordered(_render_single_forest, work_items) + ): + results[forest_name] = image_data + if (i + 1) % 10 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in forests.iterrows(): + forest_name = row["name"] + area_km2 = round(row["area_km2"], 1) if "area_km2" in row else 0 + image_data = results[forest_name] + filename = f"forest_{forest_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', forest_name, str(area_km2)], + tags=["geography", "poland", "forests", "puszcza", "nature"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish forests.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_forests.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Forests (Puszcze)", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("polish_forests.apkg") + + try: + sys.stdout.write("Loading forests data...\n") + forests = get_polish_forests() + poland_boundary = get_poland_boundary() + num_forests = len(forests) + + sys.stdout.write(f"Found {num_forests} forests.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(forests, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_forests = list(forests.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_forests)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_forests: + forest_name = row["name"] + forest_gdf = gpd.GeoDataFrame([row], crs=forests.crs) + image_data = generate_forest_image_bytes(forest_gdf, poland_boundary) + safe_name = forest_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Forests: {num_forests}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_forests/run.sh b/python_pkg/polish_forests/run.sh new file mode 100755 index 0000000..3a81296 --- /dev/null +++ b/python_pkg/polish_forests/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish forests Anki generator + +cd "$(dirname "$0")" || exit + +python polish_forests_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_gminy/polish_gminy_anki.py b/python_pkg/polish_gminy/polish_gminy_anki.py index 109d4a7..3ceaeae 100755 --- a/python_pkg/polish_gminy/polish_gminy_anki.py +++ b/python_pkg/polish_gminy/polish_gminy_anki.py @@ -111,7 +111,7 @@ def create_gmina_map( # Plot the highlighted gmina gmina_gdf.plot(ax=ax, color=fill_color, alpha=0.9) - gmina_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=3) + gmina_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=1.5) # Set bounds to Poland bounds = poland_boundary.total_bounds diff --git a/python_pkg/polish_islands/__init__.py b/python_pkg/polish_islands/__init__.py new file mode 100644 index 0000000..183c452 --- /dev/null +++ b/python_pkg/polish_islands/__init__.py @@ -0,0 +1 @@ +"""Polish islands Anki generator.""" diff --git a/python_pkg/polish_islands/polish_islands_anki.py b/python_pkg/polish_islands/polish_islands_anki.py new file mode 100644 index 0000000..8285930 --- /dev/null +++ b/python_pkg/polish_islands/polish_islands_anki.py @@ -0,0 +1,410 @@ +"""Anki flashcard generator for Polish islands. + +Generates Anki-compatible flashcard decks with maps showing islands +highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_islands + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +ISLAND_COLOR = "#E67E22" # Orange for islands +NEIGHBOR_COLOR = "#EAECEE" # Lighter gray for extended view + +# Padding for zoom (in degrees) +ZOOM_PADDING_DEG = 0.2 + + +def _island_extends_beyond( + island_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bool: + """Check if island extends beyond Poland's boundaries.""" + poland_bounds = poland_boundary.total_bounds # [minx, miny, maxx, maxy] + island_bounds = island_gdf.total_bounds + + # Check if any part of island is outside Poland + extends_west = island_bounds[0] < poland_bounds[0] + extends_south = island_bounds[1] < poland_bounds[1] + extends_east = island_bounds[2] > poland_bounds[2] + extends_north = island_bounds[3] > poland_bounds[3] + + return extends_west or extends_south or extends_east or extends_north + + +def create_island_map( + island_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + *, + zoom: bool, +) -> Figure: + """Create a map showing Poland with one island highlighted. + + Args: + island_gdf: GeoDataFrame with the island to highlight. + poland_boundary: GeoDataFrame with Poland's boundary. + zoom: If True, zoom to island area for better visibility. + """ + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + extends_beyond = _island_extends_beyond(island_gdf, poland_boundary) + + if extends_beyond: + # Draw extended background if island goes beyond Poland + island_bounds = island_gdf.total_bounds + padding = 0.5 + ax.fill( + [ + island_bounds[0] - padding, + island_bounds[2] + padding, + island_bounds[2] + padding, + island_bounds[0] - padding, + ], + [ + island_bounds[1] - padding, + island_bounds[1] - padding, + island_bounds[3] + padding, + island_bounds[3] + padding, + ], + color=NEIGHBOR_COLOR, + zorder=0, + ) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the island with thinner lines + island_gdf.plot(ax=ax, color=ISLAND_COLOR, alpha=0.9) + island_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=1.5) + + # Set bounds based on zoom mode and whether island extends beyond + if zoom: + # Zoom to island area with padding + island_bounds = island_gdf.total_bounds + ax.set_xlim( + island_bounds[0] - ZOOM_PADDING_DEG, + island_bounds[2] + ZOOM_PADDING_DEG, + ) + ax.set_ylim( + island_bounds[1] - ZOOM_PADDING_DEG, + island_bounds[3] + ZOOM_PADDING_DEG, + ) + elif extends_beyond: + # Include the full island in view + island_bounds = island_gdf.total_bounds + poland_bounds = poland_boundary.total_bounds + ax.set_xlim( + min(poland_bounds[0], island_bounds[0] - 0.1), + max(poland_bounds[2], island_bounds[2] + 0.1), + ) + ax.set_ylim( + min(poland_bounds[1], island_bounds[1] - 0.1), + max(poland_bounds[3], island_bounds[3] + 0.1), + ) + else: + # Normal Poland bounds + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_island_image_bytes( + island_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + *, + zoom: bool, +) -> bytes: + """Generate an island map image as bytes.""" + fig = create_island_map(island_gdf, poland_boundary, zoom=zoom) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None +_mp_zoom_mode: str = "no-zoom" + + +def _init_worker(poland_geojson: str, zoom_mode: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary, _mp_zoom_mode # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + _mp_zoom_mode = zoom_mode + + +def _render_single_island(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single island image (worker function). + + Args: + args: Tuple of (island_name, island_geojson_str). + + Returns: + Tuple of (island_name, image_bytes). + """ + island_name, island_geojson = args + island_gdf = gpd.read_file(island_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_island_image_bytes( + island_gdf, _mp_poland_boundary, zoom=(_mp_zoom_mode == "zoom") + ) + return island_name, image_data + + +def generate_anki_package( + islands: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Islands", + *, + zoom: bool = True, +) -> genanki.Package: + """Generate Anki package for Polish islands.""" + model_id_hash = hashlib.md5(f"polish_islands_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Island Model", + fields=[ + {"name": "IslandMap"}, + {"name": "IslandName"}, + {"name": "Area"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{IslandMap}}
', + "afmt": '
{{IslandMap}}
' + '
' + '
{{IslandName}}
' + '
{{Area}} km²
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (island_name, island_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in islands.iterrows(): + island_gdf = gpd.GeoDataFrame([row], crs=islands.crs) + island_geojson = island_gdf.to_json() + work_items.append((row["name"], island_geojson)) + + # Use multiprocessing for parallel rendering + zoom_mode = "zoom" if zoom else "no-zoom" + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path, zoom_mode), + ) as pool: + for i, (island_name, image_data) in enumerate( + pool.imap_unordered(_render_single_island, work_items) + ): + results[island_name] = image_data + if (i + 1) % 10 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in islands.iterrows(): + island_name = row["name"] + area_km2 = round(row["area_km2"], 1) if "area_km2" in row else 0 + image_data = results[island_name] + filename = f"island_{island_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', island_name, str(area_km2)], + tags=["geography", "poland", "islands", "coastal"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish islands.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_islands.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Islands", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("polish_islands.apkg") + + try: + sys.stdout.write("Loading islands data...\n") + islands = get_polish_islands() + poland_boundary = get_poland_boundary() + num_islands = len(islands) + + sys.stdout.write(f"Found {num_islands} islands.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(islands, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_islands = list(islands.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_islands)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_islands: + island_name = row["name"] + island_gdf = gpd.GeoDataFrame([row], crs=islands.crs) + image_data = generate_island_image_bytes( + island_gdf, poland_boundary, zoom=True + ) + safe_name = island_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Islands: {num_islands}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_islands/run.sh b/python_pkg/polish_islands/run.sh new file mode 100755 index 0000000..3994ee3 --- /dev/null +++ b/python_pkg/polish_islands/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish islands Anki generator + +cd "$(dirname "$0")" || exit + +python polish_islands_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_lakes/__init__.py b/python_pkg/polish_lakes/__init__.py new file mode 100644 index 0000000..e50af49 --- /dev/null +++ b/python_pkg/polish_lakes/__init__.py @@ -0,0 +1 @@ +"""Polish lakes Anki deck generator.""" diff --git a/python_pkg/polish_lakes/polish_lakes_anki.py b/python_pkg/polish_lakes/polish_lakes_anki.py new file mode 100644 index 0000000..279fa57 --- /dev/null +++ b/python_pkg/polish_lakes/polish_lakes_anki.py @@ -0,0 +1,362 @@ +"""Anki flashcard generator for Polish lakes. + +Generates Anki-compatible flashcard decks with ZOOMED maps showing lakes +highlighted for better visibility. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_lakes + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +LAKE_COLOR = "#3498DB" # Blue for lakes +ZOOM_PADDING_DEG = 0.3 # Degrees of padding around lake for zoomed view + + +def create_lake_map( + lake_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + *, + zoom: bool = True, +) -> Figure: + """Create a map showing Poland with one lake highlighted.""" + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the lake with thinner border + lake_gdf.plot(ax=ax, color=LAKE_COLOR, alpha=0.9) + lake_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=1.5) + + if zoom: + # Zoom to lake area with padding + bounds = lake_gdf.total_bounds + min_x, min_y, max_x, max_y = bounds + # Add padding + ax.set_xlim(min_x - ZOOM_PADDING_DEG, max_x + ZOOM_PADDING_DEG) + ax.set_ylim(min_y - ZOOM_PADDING_DEG, max_y + ZOOM_PADDING_DEG) + else: + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_lake_image_bytes( + lake_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + *, + zoom: bool = True, +) -> bytes: + """Generate a lake map image as bytes.""" + fig = create_lake_map(lake_gdf, poland_boundary, zoom=zoom) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None +_mp_zoom: bool = True + + +def _init_worker(poland_geojson: str, zoom_mode: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary, _mp_zoom # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + _mp_zoom = zoom_mode == "zoom" + + +def _render_single_lake(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single lake image (worker function). + + Args: + args: Tuple of (lake_name, lake_geojson_str). + + Returns: + Tuple of (lake_name, image_bytes). + """ + lake_name, lake_geojson = args + lake_gdf = gpd.read_file(lake_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_lake_image_bytes(lake_gdf, _mp_poland_boundary, zoom=_mp_zoom) + return lake_name, image_data + + +def generate_anki_package( + lakes: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Lakes", + *, + zoom: bool = True, +) -> genanki.Package: + """Generate Anki package for Polish lakes.""" + model_id_hash = hashlib.md5(f"polish_lakes_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Lake Model", + fields=[ + {"name": "LakeMap"}, + {"name": "LakeName"}, + {"name": "Area"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{LakeMap}}
', + "afmt": '
{{LakeMap}}
' + '
' + '
{{LakeName}}
' + '
{{Area}} km²
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (lake_name, lake_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in lakes.iterrows(): + lake_gdf = gpd.GeoDataFrame([row], crs=lakes.crs) + lake_geojson = lake_gdf.to_json() + work_items.append((row["name"], lake_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path, "zoom" if zoom else "no-zoom"), + ) as pool: + for i, (lake_name, image_data) in enumerate( + pool.imap_unordered(_render_single_lake, work_items) + ): + results[lake_name] = image_data + if (i + 1) % 50 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in lakes.iterrows(): + lake_name = row["name"] + area_km2 = round(row["area_km2"], 1) if "area_km2" in row else 0 + image_data = results[lake_name] + filename = f"lake_{lake_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', lake_name, str(area_km2)], + tags=["geography", "poland", "lakes", "water"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish lakes.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_lakes.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Lakes", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + parser.add_argument( + "--no-zoom", + action="store_true", + help="Disable zoom (show entire Poland instead of zoomed region)", + ) + parser.add_argument( + "--limit", + "-l", + type=int, + default=None, + help="Limit number of lakes (for testing)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("polish_lakes.apkg") + zoom = not args.no_zoom + + try: + sys.stdout.write("Loading lakes data...\n") + lakes = get_polish_lakes() + poland_boundary = get_poland_boundary() + + if args.limit: + lakes = lakes.head(args.limit) + sys.stdout.write(f"Limiting to {args.limit} lakes.\n") + + num_lakes = len(lakes) + + sys.stdout.write(f"Found {num_lakes} lakes.\n") + sys.stdout.write(f"Zoom mode: {'enabled' if zoom else 'disabled'}\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package( + lakes, poland_boundary, args.deck_name, zoom=zoom + ) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_lakes = list(lakes.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_lakes)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_lakes: + lake_name = row["name"] + lake_gdf = gpd.GeoDataFrame([row], crs=lakes.crs) + image_data = generate_lake_image_bytes( + lake_gdf, poland_boundary, zoom=zoom + ) + safe_name = lake_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Lakes: {num_lakes}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_lakes/run.sh b/python_pkg/polish_lakes/run.sh new file mode 100755 index 0000000..8bfeb08 --- /dev/null +++ b/python_pkg/polish_lakes/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish lakes Anki generator + +cd "$(dirname "$0")" || exit + +python polish_lakes_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_landscape_parks/__init__.py b/python_pkg/polish_landscape_parks/__init__.py new file mode 100644 index 0000000..d065b03 --- /dev/null +++ b/python_pkg/polish_landscape_parks/__init__.py @@ -0,0 +1 @@ +"""Polish landscape parks Anki deck generator.""" diff --git a/python_pkg/polish_landscape_parks/polish_landscape_parks_anki.py b/python_pkg/polish_landscape_parks/polish_landscape_parks_anki.py new file mode 100644 index 0000000..325ca2f --- /dev/null +++ b/python_pkg/polish_landscape_parks/polish_landscape_parks_anki.py @@ -0,0 +1,336 @@ +"""Anki flashcard generator for Polish landscape parks. + +Generates Anki-compatible flashcard decks with maps showing landscape parks +highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_landscape_parks + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +PARK_COLOR = "#27AE60" # Green for landscape parks + + +def create_park_map( + park_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one landscape park highlighted. + + Clips park geometry to Poland boundary for clean edges. + """ + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Clip park geometry to Poland boundary for clean edges + boundary_union = poland_boundary.union_all() + clipped_gdf = park_gdf.copy() + clipped_gdf["geometry"] = park_gdf.geometry.intersection(boundary_union) + + # Plot the landscape park with thinner lines + clipped_gdf.plot(ax=ax, color=PARK_COLOR, alpha=0.9) + clipped_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=1.5) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_park_image_bytes( + park_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a park map image as bytes.""" + fig = create_park_map(park_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_park(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single park image (worker function). + + Args: + args: Tuple of (park_name, park_geojson_str). + + Returns: + Tuple of (park_name, image_bytes). + """ + park_name, park_geojson = args + park_gdf = gpd.read_file(park_geojson) + # Fix any geometry issues from serialization + park_gdf["geometry"] = park_gdf.geometry.make_valid() + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_park_image_bytes(park_gdf, _mp_poland_boundary) + return park_name, image_data + + +def generate_anki_package( + parks: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Landscape Parks", +) -> genanki.Package: + """Generate Anki package for Polish landscape parks.""" + model_id_hash = hashlib.md5( # noqa: S324 + f"polish_landscape_parks_{deck_name}".encode() + ) + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Landscape Park Model", + fields=[ + {"name": "ParkMap"}, + {"name": "ParkName"}, + {"name": "Area"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{ParkMap}}
', + "afmt": '
{{ParkMap}}
' + '
' + '
{{ParkName}}
' + '
{{Area}} km²
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (park_name, park_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in parks.iterrows(): + park_gdf = gpd.GeoDataFrame([row], crs=parks.crs) + park_geojson = park_gdf.to_json() + work_items.append((row["name"], park_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (park_name, image_data) in enumerate( + pool.imap_unordered(_render_single_park, work_items) + ): + results[park_name] = image_data + if (i + 1) % 25 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in parks.iterrows(): + park_name = row["name"] + area_km2 = round(row["area_km2"], 1) if "area_km2" in row else 0 + image_data = results[park_name] + filename = f"lpark_{park_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', park_name, str(area_km2)], + tags=["geography", "poland", "landscape-parks", "nature"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish landscape parks.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_landscape_parks.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Landscape Parks", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = ( + Path(args.output) if args.output else Path("polish_landscape_parks.apkg") + ) + + try: + sys.stdout.write("Loading landscape parks data...\n") + parks = get_polish_landscape_parks() + poland_boundary = get_poland_boundary() + num_parks = len(parks) + + sys.stdout.write(f"Found {num_parks} landscape parks.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(parks, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_parks = list(parks.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_parks)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_parks: + park_name = row["name"] + park_gdf = gpd.GeoDataFrame([row], crs=parks.crs) + image_data = generate_park_image_bytes(park_gdf, poland_boundary) + safe_name = park_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Landscape parks: {num_parks}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_landscape_parks/run.sh b/python_pkg/polish_landscape_parks/run.sh new file mode 100755 index 0000000..7867103 --- /dev/null +++ b/python_pkg/polish_landscape_parks/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish landscape parks Anki generator + +cd "$(dirname "$0")" || exit + +python polish_landscape_parks_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_mountain_peaks/__init__.py b/python_pkg/polish_mountain_peaks/__init__.py new file mode 100644 index 0000000..ec74a73 --- /dev/null +++ b/python_pkg/polish_mountain_peaks/__init__.py @@ -0,0 +1 @@ +"""Polish mountain peaks Anki deck generator.""" diff --git a/python_pkg/polish_mountain_peaks/polish_mountain_peaks_anki.py b/python_pkg/polish_mountain_peaks/polish_mountain_peaks_anki.py new file mode 100644 index 0000000..c466cf0 --- /dev/null +++ b/python_pkg/polish_mountain_peaks/polish_mountain_peaks_anki.py @@ -0,0 +1,378 @@ +"""Anki flashcard generator for Polish mountain peaks. + +Generates Anki-compatible flashcard decks with ZOOMED maps showing mountain peaks +highlighted on a regional map for better visibility. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_mountain_peaks + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +MARKER_COLOR = "#E74C3C" # Red marker for peaks +ZOOM_PADDING_DEG = 0.5 # Degrees of padding around peak for zoomed view + + +def create_peak_map( + peak_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + *, + zoom: bool = True, +) -> Figure: + """Create a map showing Poland with one peak highlighted. + + Args: + peak_gdf: GeoDataFrame with the peak point. + poland_boundary: GeoDataFrame with Poland boundary. + zoom: If True, zoom to peak area; if False, show entire Poland. + """ + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the peak as a marker + peak_gdf.plot( + ax=ax, + color=MARKER_COLOR, + markersize=400 if zoom else 200, + marker="^", # Triangle for mountain + edgecolor="#1A1A1A", + linewidth=2, + zorder=5, + ) + + if zoom: + # Zoom to peak area with padding + geom = peak_gdf.iloc[0].geometry + peak_x, peak_y = geom.x, geom.y + ax.set_xlim(peak_x - ZOOM_PADDING_DEG, peak_x + ZOOM_PADDING_DEG) + ax.set_ylim(peak_y - ZOOM_PADDING_DEG, peak_y + ZOOM_PADDING_DEG) + else: + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_peak_image_bytes( + peak_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + *, + zoom: bool = True, +) -> bytes: + """Generate a peak map image as bytes.""" + fig = create_peak_map(peak_gdf, poland_boundary, zoom=zoom) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None +_mp_zoom: bool = True + + +def _init_worker(poland_geojson: str, zoom_mode: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary, _mp_zoom # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + _mp_zoom = zoom_mode == "zoom" + + +def _render_single_peak(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single peak image (worker function). + + Args: + args: Tuple of (peak_name, peak_geojson_str). + + Returns: + Tuple of (peak_name, image_bytes). + """ + peak_name, peak_geojson = args + peak_gdf = gpd.read_file(peak_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_peak_image_bytes(peak_gdf, _mp_poland_boundary, zoom=_mp_zoom) + return peak_name, image_data + + +def generate_anki_package( + peaks: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Mountain Peaks", + *, + zoom: bool = True, +) -> genanki.Package: + """Generate Anki package for Polish mountain peaks.""" + model_id_hash = hashlib.md5( # noqa: S324 + f"polish_mountain_peaks_{deck_name}".encode() + ) + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Mountain Peak Model", + fields=[ + {"name": "PeakMap"}, + {"name": "PeakName"}, + {"name": "Elevation"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{PeakMap}}
', + "afmt": '
{{PeakMap}}
' + '
' + '
{{PeakName}}
' + '
{{Elevation}} m n.p.m.
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (peak_name, peak_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in peaks.iterrows(): + peak_gdf = gpd.GeoDataFrame([row], crs=peaks.crs) + peak_geojson = peak_gdf.to_json() + work_items.append((row["name"], peak_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path, "zoom" if zoom else "no-zoom"), + ) as pool: + for i, (peak_name, image_data) in enumerate( + pool.imap_unordered(_render_single_peak, work_items) + ): + results[peak_name] = image_data + if (i + 1) % 50 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in peaks.iterrows(): + peak_name = row["name"] + elevation = int(row["elevation"]) + image_data = results[peak_name] + filename = f"peak_{peak_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', peak_name, str(elevation)], + tags=["geography", "poland", "mountains", "peaks"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish mountain peaks.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_mountain_peaks.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Mountain Peaks", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + parser.add_argument( + "--no-zoom", + action="store_true", + help="Disable zoom (show entire Poland instead of zoomed region)", + ) + parser.add_argument( + "--limit", + "-l", + type=int, + default=None, + help="Limit number of peaks (for testing)", + ) + + args = parser.parse_args(argv) + output_path = ( + Path(args.output) if args.output else Path("polish_mountain_peaks.apkg") + ) + zoom = not args.no_zoom + + try: + sys.stdout.write("Loading mountain peaks data...\n") + peaks = get_polish_mountain_peaks() + poland_boundary = get_poland_boundary() + + if args.limit: + peaks = peaks.head(args.limit) + sys.stdout.write(f"Limiting to {args.limit} peaks.\n") + + num_peaks = len(peaks) + + sys.stdout.write(f"Found {num_peaks} mountain peaks.\n") + sys.stdout.write(f"Zoom mode: {'enabled' if zoom else 'disabled'}\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package( + peaks, poland_boundary, args.deck_name, zoom=zoom + ) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_peaks = list(peaks.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_peaks)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_peaks: + peak_name = row["name"] + peak_gdf = gpd.GeoDataFrame([row], crs=peaks.crs) + image_data = generate_peak_image_bytes( + peak_gdf, poland_boundary, zoom=zoom + ) + safe_name = peak_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Mountain peaks: {num_peaks}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_mountain_peaks/run.sh b/python_pkg/polish_mountain_peaks/run.sh new file mode 100755 index 0000000..cc431fd --- /dev/null +++ b/python_pkg/polish_mountain_peaks/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish mountain peaks Anki generator + +cd "$(dirname "$0")" || exit + +python polish_mountain_peaks_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_mountain_ranges/__init__.py b/python_pkg/polish_mountain_ranges/__init__.py new file mode 100644 index 0000000..1c52609 --- /dev/null +++ b/python_pkg/polish_mountain_ranges/__init__.py @@ -0,0 +1 @@ +"""Polish mountain ranges Anki generator.""" diff --git a/python_pkg/polish_mountain_ranges/polish_mountain_ranges_anki.py b/python_pkg/polish_mountain_ranges/polish_mountain_ranges_anki.py new file mode 100644 index 0000000..68d71e5 --- /dev/null +++ b/python_pkg/polish_mountain_ranges/polish_mountain_ranges_anki.py @@ -0,0 +1,332 @@ +"""Anki flashcard generator for Polish mountain ranges. + +Generates Anki-compatible flashcard decks with maps showing mountain ranges +highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_mountain_ranges + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +RANGE_COLOR = "#7B5A31" # Brown for mountain ranges + + +def create_range_map( + range_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one mountain range highlighted.""" + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Clip mountain range to Poland boundary + clipped_gdf = range_gdf.copy() + clipped_gdf["geometry"] = range_gdf.geometry.intersection( + poland_boundary.union_all() + ) + + # Plot the mountain range (clipped to Poland) + clipped_gdf.plot(ax=ax, color=RANGE_COLOR, alpha=0.9) + clipped_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=1.5) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_range_image_bytes( + range_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a range map image as bytes.""" + fig = create_range_map(range_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_range(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single range image (worker function). + + Args: + args: Tuple of (range_name, range_geojson_str). + + Returns: + Tuple of (range_name, image_bytes). + """ + range_name, range_geojson = args + range_gdf = gpd.read_file(range_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_range_image_bytes(range_gdf, _mp_poland_boundary) + return range_name, image_data + + +def generate_anki_package( + ranges: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Mountain Ranges", +) -> genanki.Package: + """Generate Anki package for Polish mountain ranges.""" + model_id_hash = hashlib.md5( # noqa: S324 + f"polish_mountain_ranges_{deck_name}".encode() + ) + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Mountain Range Model", + fields=[ + {"name": "RangeMap"}, + {"name": "RangeName"}, + {"name": "Area"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{RangeMap}}
', + "afmt": '
{{RangeMap}}
' + '
' + '
{{RangeName}}
' + '
{{Area}} km²
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (range_name, range_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in ranges.iterrows(): + range_gdf = gpd.GeoDataFrame([row], crs=ranges.crs) + range_geojson = range_gdf.to_json() + work_items.append((row["name"], range_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (range_name, image_data) in enumerate( + pool.imap_unordered(_render_single_range, work_items) + ): + results[range_name] = image_data + if (i + 1) % 10 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in ranges.iterrows(): + range_name = row["name"] + area_km2 = round(row["area_km2"], 1) if "area_km2" in row else 0 + image_data = results[range_name] + filename = f"range_{range_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', range_name, str(area_km2)], + tags=["geography", "poland", "mountain-ranges", "mountains"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish mountain ranges.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_mountain_ranges.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Mountain Ranges", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = ( + Path(args.output) if args.output else Path("polish_mountain_ranges.apkg") + ) + + try: + sys.stdout.write("Loading mountain ranges data...\n") + ranges = get_polish_mountain_ranges() + poland_boundary = get_poland_boundary() + num_ranges = len(ranges) + + sys.stdout.write(f"Found {num_ranges} mountain ranges.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(ranges, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_ranges = list(ranges.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_ranges)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_ranges: + range_name = row["name"] + range_gdf = gpd.GeoDataFrame([row], crs=ranges.crs) + image_data = generate_range_image_bytes(range_gdf, poland_boundary) + safe_name = range_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Mountain ranges: {num_ranges}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_mountain_ranges/run.sh b/python_pkg/polish_mountain_ranges/run.sh new file mode 100755 index 0000000..1ff10c0 --- /dev/null +++ b/python_pkg/polish_mountain_ranges/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish mountain ranges Anki generator + +cd "$(dirname "$0")" || exit + +python polish_mountain_ranges_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_national_parks/__init__.py b/python_pkg/polish_national_parks/__init__.py new file mode 100644 index 0000000..8094984 --- /dev/null +++ b/python_pkg/polish_national_parks/__init__.py @@ -0,0 +1 @@ +"""Polish national parks Anki deck generator.""" diff --git a/python_pkg/polish_national_parks/polish_national_parks_anki.py b/python_pkg/polish_national_parks/polish_national_parks_anki.py new file mode 100644 index 0000000..a7cd480 --- /dev/null +++ b/python_pkg/polish_national_parks/polish_national_parks_anki.py @@ -0,0 +1,348 @@ +"""Anki flashcard generator for Polish national parks. + +Generates Anki-compatible flashcard decks with maps showing national parks +highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_national_parks + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +PARK_COLOR = "#2ECC71" # Green for national parks + +# Threshold for "small" parks that need an icon (in km²) +SMALL_PARK_THRESHOLD_KM2 = 100 + + +def create_park_map( + park_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one national park highlighted. + + For small parks, also shows a tree marker at the centroid for visibility. + """ + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the national park + park_gdf.plot(ax=ax, color=PARK_COLOR, alpha=0.9) + park_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=1.5) + + # For small parks, add a tree marker at the centroid + area_km2 = park_gdf.iloc[0].get("area_km2", 0) + if area_km2 < SMALL_PARK_THRESHOLD_KM2: + centroid = park_gdf.iloc[0].geometry.centroid + # Use a tree-like marker (triangle pointing up) + ax.scatter( + [centroid.x], + [centroid.y], + s=600, + c="#006400", + marker="^", + edgecolor="#1A1A1A", + linewidth=2, + zorder=10, + ) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_park_image_bytes( + park_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a park map image as bytes.""" + fig = create_park_map(park_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_park(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single park image (worker function). + + Args: + args: Tuple of (park_name, park_geojson_str). + + Returns: + Tuple of (park_name, image_bytes). + """ + park_name, park_geojson = args + park_gdf = gpd.read_file(park_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_park_image_bytes(park_gdf, _mp_poland_boundary) + return park_name, image_data + + +def generate_anki_package( + parks: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish National Parks", +) -> genanki.Package: + """Generate Anki package for Polish national parks.""" + model_id_hash = hashlib.md5( # noqa: S324 + f"polish_national_parks_{deck_name}".encode() + ) + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish National Park Model", + fields=[ + {"name": "ParkMap"}, + {"name": "ParkName"}, + {"name": "Area"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{ParkMap}}
', + "afmt": '
{{ParkMap}}
' + '
' + '
{{ParkName}}
' + '
{{Area}} km²
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (park_name, park_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in parks.iterrows(): + park_gdf = gpd.GeoDataFrame([row], crs=parks.crs) + park_geojson = park_gdf.to_json() + work_items.append((row["name"], park_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (park_name, image_data) in enumerate( + pool.imap_unordered(_render_single_park, work_items) + ): + results[park_name] = image_data + if (i + 1) % 10 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in parks.iterrows(): + park_name = row["name"] + area_km2 = round(row["area_km2"], 1) if "area_km2" in row else 0 + image_data = results[park_name] + filename = f"park_{park_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', park_name, str(area_km2)], + tags=["geography", "poland", "national-parks", "nature"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish national parks.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_national_parks.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish National Parks", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = ( + Path(args.output) if args.output else Path("polish_national_parks.apkg") + ) + + try: + sys.stdout.write("Loading national parks data...\n") + parks = get_polish_national_parks() + poland_boundary = get_poland_boundary() + num_parks = len(parks) + + sys.stdout.write(f"Found {num_parks} national parks.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(parks, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_parks = list(parks.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_parks)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_parks: + park_name = row["name"] + park_gdf = gpd.GeoDataFrame([row], crs=parks.crs) + image_data = generate_park_image_bytes(park_gdf, poland_boundary) + safe_name = park_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"National parks: {num_parks}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_national_parks/run.sh b/python_pkg/polish_national_parks/run.sh new file mode 100755 index 0000000..c31f77b --- /dev/null +++ b/python_pkg/polish_national_parks/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish national parks Anki generator + +cd "$(dirname "$0")" || exit + +python polish_national_parks_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_nature_reserves/__init__.py b/python_pkg/polish_nature_reserves/__init__.py new file mode 100644 index 0000000..24de2f5 --- /dev/null +++ b/python_pkg/polish_nature_reserves/__init__.py @@ -0,0 +1 @@ +"""Polish nature reserves Anki generator.""" diff --git a/python_pkg/polish_nature_reserves/polish_nature_reserves_anki.py b/python_pkg/polish_nature_reserves/polish_nature_reserves_anki.py new file mode 100644 index 0000000..6ce086e --- /dev/null +++ b/python_pkg/polish_nature_reserves/polish_nature_reserves_anki.py @@ -0,0 +1,345 @@ +"""Anki flashcard generator for Polish nature reserves. + +Generates Anki-compatible flashcard decks with maps showing nature reserves +highlighted on a Poland map. Optimized for large datasets (~1500 reserves). +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_nature_reserves + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +RESERVE_COLOR = "#16A085" # Teal for nature reserves + + +def create_reserve_map( + reserve_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one nature reserve highlighted.""" + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the nature reserve + reserve_gdf.plot(ax=ax, color=RESERVE_COLOR, alpha=0.9) + reserve_gdf.boundary.plot(ax=ax, color="#1A1A1A", linewidth=3) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_reserve_image_bytes( + reserve_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a reserve map image as bytes.""" + fig = create_reserve_map(reserve_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_reserve(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single reserve image (worker function). + + Args: + args: Tuple of (reserve_name, reserve_geojson_str). + + Returns: + Tuple of (reserve_name, image_bytes). + """ + reserve_name, reserve_geojson = args + reserve_gdf = gpd.read_file(reserve_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_reserve_image_bytes(reserve_gdf, _mp_poland_boundary) + return reserve_name, image_data + + +def generate_anki_package( + reserves: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Nature Reserves", +) -> genanki.Package: + """Generate Anki package for Polish nature reserves.""" + model_id_hash = hashlib.md5( # noqa: S324 + f"polish_nature_reserves_{deck_name}".encode() + ) + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 28px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish Nature Reserve Model", + fields=[ + {"name": "ReserveMap"}, + {"name": "ReserveName"}, + {"name": "Area"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{ReserveMap}}
', + "afmt": '
{{ReserveMap}}
' + '
' + '
{{ReserveName}}
' + '
{{Area}} km²
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (reserve_name, reserve_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in reserves.iterrows(): + reserve_gdf = gpd.GeoDataFrame([row], crs=reserves.crs) + reserve_geojson = reserve_gdf.to_json() + work_items.append((row["name"], reserve_geojson)) + + # Use multiprocessing for parallel rendering (more workers for large datasets) + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + sys.stdout.write("(This may take a while due to the large number of reserves)\n") + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (reserve_name, image_data) in enumerate( + pool.imap_unordered(_render_single_reserve, work_items) + ): + results[reserve_name] = image_data + if (i + 1) % 100 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + sys.stdout.write(f" Rendered {len(work_items)}/{len(work_items)}.\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in reserves.iterrows(): + reserve_name = row["name"] + area_km2 = round(row["area_km2"], 2) if "area_km2" in row else 0 + image_data = results[reserve_name] + # Use hash for unique filename since names may have special chars + name_hash = hashlib.md5(reserve_name.encode()).hexdigest()[:8] # noqa: S324 + safe_name = reserve_name.replace(" ", "_").replace("/", "_")[:30] + filename = f"reserve_{safe_name}_{name_hash}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', reserve_name, str(area_km2)], + tags=["geography", "poland", "nature-reserves", "protected-areas"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish nature reserves.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_nature_reserves.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Nature Reserves", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + parser.add_argument( + "--limit", + "-l", + type=int, + default=None, + help="Limit number of reserves (for testing, default: all)", + ) + + args = parser.parse_args(argv) + output_path = ( + Path(args.output) if args.output else Path("polish_nature_reserves.apkg") + ) + + try: + sys.stdout.write("Loading nature reserves data...\n") + reserves = get_polish_nature_reserves() + poland_boundary = get_poland_boundary() + + # Apply limit if specified + if args.limit: + reserves = reserves.head(args.limit) + sys.stdout.write(f"Limiting to {args.limit} reserves.\n") + + num_reserves = len(reserves) + + sys.stdout.write(f"Found {num_reserves} nature reserves.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(reserves, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_reserves = list(reserves.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_reserves)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_reserves: + reserve_name = row["name"] + reserve_gdf = gpd.GeoDataFrame([row], crs=reserves.crs) + image_data = generate_reserve_image_bytes(reserve_gdf, poland_boundary) + safe_name = reserve_name.replace(" ", "_").replace("/", "_")[:30] + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Nature reserves: {num_reserves}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_nature_reserves/run.sh b/python_pkg/polish_nature_reserves/run.sh new file mode 100755 index 0000000..de2185b --- /dev/null +++ b/python_pkg/polish_nature_reserves/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Run the Polish nature reserves Anki generator + +cd "$(dirname "$0")" || exit + +# Default runs all reserves - use --limit for testing +python polish_nature_reserves_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_rivers/__init__.py b/python_pkg/polish_rivers/__init__.py new file mode 100644 index 0000000..e9098ab --- /dev/null +++ b/python_pkg/polish_rivers/__init__.py @@ -0,0 +1 @@ +"""Polish rivers Anki deck generator.""" diff --git a/python_pkg/polish_rivers/polish_rivers_anki.py b/python_pkg/polish_rivers/polish_rivers_anki.py new file mode 100644 index 0000000..71461f0 --- /dev/null +++ b/python_pkg/polish_rivers/polish_rivers_anki.py @@ -0,0 +1,355 @@ +"""Anki flashcard generator for Polish rivers. + +Generates Anki-compatible flashcard decks with maps showing rivers +highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_rivers + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +RIVER_COLOR = "#2980B9" # Dark blue for rivers +NEIGHBOR_COLOR = "#EAECEE" # Light gray for neighboring areas + + +def create_river_map( + river_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one river highlighted. + + Rivers that extend beyond Poland show an extended view. + """ + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Get Poland bounds + poland_bounds = poland_boundary.total_bounds + river_bounds = river_gdf.total_bounds + + # Check if river extends beyond Poland + extends_beyond = ( + river_bounds[0] < poland_bounds[0] + or river_bounds[1] < poland_bounds[1] + or river_bounds[2] > poland_bounds[2] + or river_bounds[3] > poland_bounds[3] + ) + + if extends_beyond: + # Calculate extended bounds with some padding + min_x = min(poland_bounds[0], river_bounds[0]) - 0.2 + min_y = min(poland_bounds[1], river_bounds[1]) - 0.2 + max_x = max(poland_bounds[2], river_bounds[2]) + 0.2 + max_y = max(poland_bounds[3], river_bounds[3]) + 0.2 + + # Draw background for extended area (neighboring countries) + ax.fill( + [min_x, max_x, max_x, min_x, min_x], + [min_y, min_y, max_y, max_y, min_y], + color=NEIGHBOR_COLOR, + alpha=0.3, + ) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Plot the river + river_gdf.plot(ax=ax, color=RIVER_COLOR, linewidth=3, alpha=0.9) + + if extends_beyond: + ax.set_xlim(min_x, max_x) + ax.set_ylim(min_y, max_y) + else: + # Set bounds to Poland + ax.set_xlim(poland_bounds[0], poland_bounds[2]) + ax.set_ylim(poland_bounds[1], poland_bounds[3]) + + return fig + + +def generate_river_image_bytes( + river_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a river map image as bytes.""" + fig = create_river_map(river_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_river(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single river image (worker function). + + Args: + args: Tuple of (river_name, river_geojson_str). + + Returns: + Tuple of (river_name, image_bytes). + """ + river_name, river_geojson = args + river_gdf = gpd.read_file(river_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_river_image_bytes(river_gdf, _mp_poland_boundary) + return river_name, image_data + + +def generate_anki_package( + rivers: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish Rivers", +) -> genanki.Package: + """Generate Anki package for Polish rivers.""" + model_id_hash = hashlib.md5(f"polish_rivers_{deck_name}".encode()) # noqa: S324 + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 32px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +""" + + my_model = genanki.Model( + model_id, + "Polish River Model", + fields=[ + {"name": "RiverMap"}, + {"name": "RiverName"}, + {"name": "Length"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{RiverMap}}
', + "afmt": '
{{RiverMap}}
' + '
' + '
{{RiverName}}
' + '
~{{Length}} km w Polsce
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (river_name, river_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in rivers.iterrows(): + river_gdf = gpd.GeoDataFrame([row], crs=rivers.crs) + river_geojson = river_gdf.to_json() + work_items.append((row["name"], river_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (river_name, image_data) in enumerate( + pool.imap_unordered(_render_single_river, work_items) + ): + results[river_name] = image_data + if (i + 1) % 50 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in rivers.iterrows(): + river_name = row["name"] + length_km = round(row["length_km"]) if "length_km" in row else 0 + image_data = results[river_name] + filename = f"river_{river_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[f'', river_name, str(length_km)], + tags=["geography", "poland", "rivers", "water"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish rivers.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_rivers.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish Rivers", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("polish_rivers.apkg") + + try: + sys.stdout.write("Loading rivers data...\n") + rivers = get_polish_rivers() + poland_boundary = get_poland_boundary() + num_rivers = len(rivers) + + sys.stdout.write(f"Found {num_rivers} rivers.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(rivers, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_rivers = list(rivers.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_rivers)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_rivers: + river_name = row["name"] + river_gdf = gpd.GeoDataFrame([row], crs=rivers.crs) + image_data = generate_river_image_bytes(river_gdf, poland_boundary) + safe_name = river_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"Rivers: {num_rivers}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_rivers/run.sh b/python_pkg/polish_rivers/run.sh new file mode 100755 index 0000000..d84ad5d --- /dev/null +++ b/python_pkg/polish_rivers/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish rivers Anki generator + +cd "$(dirname "$0")" || exit + +python polish_rivers_anki.py --preview preview_images --preview-count 5 "$@" diff --git a/python_pkg/polish_unesco_sites/__init__.py b/python_pkg/polish_unesco_sites/__init__.py new file mode 100644 index 0000000..7e7cdfa --- /dev/null +++ b/python_pkg/polish_unesco_sites/__init__.py @@ -0,0 +1 @@ +"""Polish UNESCO sites Anki generator.""" diff --git a/python_pkg/polish_unesco_sites/polish_unesco_sites_anki.py b/python_pkg/polish_unesco_sites/polish_unesco_sites_anki.py new file mode 100644 index 0000000..314e6d8 --- /dev/null +++ b/python_pkg/polish_unesco_sites/polish_unesco_sites_anki.py @@ -0,0 +1,365 @@ +"""Anki flashcard generator for Polish UNESCO World Heritage Sites. + +Generates Anki-compatible flashcard decks with maps showing UNESCO sites +highlighted on a Poland map. +""" + +from __future__ import annotations + +import argparse +import hashlib +from io import BytesIO +import multiprocessing as mp +from pathlib import Path +import random +import sys +import tempfile +from typing import TYPE_CHECKING + +import genanki +import geopandas as gpd +import matplotlib as mpl + +mpl.use("Agg") # Non-interactive backend for multiprocessing +import matplotlib.pyplot as plt +from shapely.geometry import Point + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from geo_data import get_poland_boundary, get_polish_unesco_sites + +if TYPE_CHECKING: + from collections.abc import Sequence + + from matplotlib.figure import Figure + +SITE_COLOR_POLYGON = "#9B59B6" # Purple for polygon sites +SITE_COLOR_POINT = "#9B59B6" # Purple for point markers + + +def create_unesco_map( + site_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> Figure: + """Create a map showing Poland with one UNESCO site highlighted. + + Always shows a star marker at the centroid for consistency. + """ + fig, ax = plt.subplots(figsize=(10, 12)) + ax.set_aspect("equal") + ax.axis("off") + fig.patch.set_alpha(0) + ax.patch.set_alpha(0) + + # Plot Poland as a plain gray shape + poland_boundary.plot(ax=ax, color="#D5D8DC", alpha=0.6) + poland_boundary.boundary.plot(ax=ax, color="#2C3E50", linewidth=1) + + # Get centroid for star marker + geom = site_gdf.iloc[0].geometry + if isinstance(geom, Point): + x, y = geom.x, geom.y + else: + centroid = geom.centroid + x, y = centroid.x, centroid.y + + # Always show a star marker for consistency + ax.scatter( + [x], + [y], + s=800, + c=SITE_COLOR_POINT, + marker="*", + edgecolor="#1A1A1A", + linewidth=2, + zorder=10, + ) + + # Set bounds to Poland + bounds = poland_boundary.total_bounds + ax.set_xlim(bounds[0], bounds[2]) + ax.set_ylim(bounds[1], bounds[3]) + + return fig + + +def generate_unesco_image_bytes( + site_gdf: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, +) -> bytes: + """Generate a UNESCO site map image as bytes.""" + fig = create_unesco_map(site_gdf, poland_boundary) + + buf = BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight", dpi=150) + plt.close(fig) + buf.seek(0) + + return buf.read() + + +# Global variables for multiprocessing (set via initializer) +_mp_poland_boundary: gpd.GeoDataFrame | None = None + + +def _init_worker(poland_geojson: str) -> None: + """Initialize worker process with shared data.""" + global _mp_poland_boundary # noqa: PLW0603 + _mp_poland_boundary = gpd.read_file(poland_geojson) + + +def _render_single_site(args: tuple[str, str]) -> tuple[str, bytes]: + """Render a single site image (worker function). + + Args: + args: Tuple of (site_name, site_geojson_str). + + Returns: + Tuple of (site_name, image_bytes). + """ + site_name, site_geojson = args + site_gdf = gpd.read_file(site_geojson) + + assert _mp_poland_boundary is not None # noqa: S101 + + image_data = generate_unesco_image_bytes(site_gdf, _mp_poland_boundary) + return site_name, image_data + + +def generate_anki_package( + sites: gpd.GeoDataFrame, + poland_boundary: gpd.GeoDataFrame, + deck_name: str = "Polish UNESCO World Heritage Sites", +) -> genanki.Package: + """Generate Anki package for Polish UNESCO sites.""" + model_id_hash = hashlib.md5( # noqa: S324 + f"polish_unesco_sites_{deck_name}".encode() + ) + model_id = int(model_id_hash.hexdigest()[:8], 16) + + card_css = """ +.card { + font-family: Arial, sans-serif; + font-size: 24px; + text-align: center; + color: #333; + background-color: #fff; +} +.card.night_mode { + color: #eee; + background-color: #2f2f2f; +} +.map-container { + display: flex; + justify-content: center; + align-items: center; + min-height: 80vh; +} +.map-container img { + max-width: 100%; + max-height: 80vh; + object-fit: contain; +} +.answer-text { + font-size: 28px; + font-weight: bold; + margin-top: 20px; + color: #2C3E50; +} +.card.night_mode .answer-text { + color: #ECF0F1; +} +.info-text { + font-size: 18px; + color: #7F8C8D; + margin-top: 10px; +} +.card.night_mode .info-text { + color: #BDC3C7; +} +.year-badge { + display: inline-block; + background: #9B59B6; + color: white; + padding: 4px 12px; + border-radius: 15px; + font-size: 16px; + margin-top: 8px; +} +.card.night_mode .year-badge { + background: #8E44AD; +} +""" + + my_model = genanki.Model( + model_id, + "Polish UNESCO Site Model", + fields=[ + {"name": "SiteMap"}, + {"name": "SiteName"}, + {"name": "InscribedYear"}, + {"name": "Category"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": '
{{SiteMap}}
', + "afmt": '
{{SiteMap}}
' + '
' + '
{{SiteName}}
' + '
{{Category}}
' + '
Inscribed: {{InscribedYear}}
', + }, + ], + css=card_css, + ) + + deck_id = random.randrange(1 << 30, 1 << 31) # noqa: S311 + my_deck = genanki.Deck(deck_id, deck_name) + media_files = [] + + # Prepare data for parallel processing + with tempfile.NamedTemporaryFile(suffix=".geojson", delete=False) as f: + poland_boundary.to_file(f.name, driver="GeoJSON") + poland_geojson_path = f.name + + # Prepare work items: (site_name, site_geojson_str) + work_items: list[tuple[str, str]] = [] + for _, row in sites.iterrows(): + site_gdf = gpd.GeoDataFrame([row], crs=sites.crs) + site_geojson = site_gdf.to_json() + work_items.append((row["name"], site_geojson)) + + # Use multiprocessing for parallel rendering + num_workers = min(mp.cpu_count(), 8) + sys.stdout.write( + f"Rendering {len(work_items)} images using {num_workers} workers...\n" + ) + + results: dict[str, bytes] = {} + with mp.Pool( + num_workers, + initializer=_init_worker, + initargs=(poland_geojson_path,), + ) as pool: + for i, (site_name, image_data) in enumerate( + pool.imap_unordered(_render_single_site, work_items) + ): + results[site_name] = image_data + if (i + 1) % 5 == 0: + sys.stdout.write(f" Rendered {i + 1}/{len(work_items)}...\n") + + # Clean up temp file + Path(poland_geojson_path).unlink(missing_ok=True) + + # Create notes from results + for _, row in sites.iterrows(): + site_name = row["name"] + inscribed_year = row.get("inscribed_year", "Unknown") + category = row.get("category", "Cultural/Natural") + image_data = results[site_name] + filename = f"unesco_{site_name.replace(' ', '_').replace('/', '_')}.png" + + note = genanki.Note( + model=my_model, + fields=[ + f'', + site_name, + str(inscribed_year), + category, + ], + tags=["geography", "poland", "unesco", "heritage"], + ) + my_deck.add_note(note) + + temp_path = Path(f"/tmp/{filename}") # noqa: S108 + temp_path.write_bytes(image_data) + media_files.append(str(temp_path)) + + package = genanki.Package(my_deck) + package.media_files = media_files + return package + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate Anki flashcards for Polish UNESCO sites.", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default=None, + help="Output file path (default: polish_unesco_sites.apkg)", + ) + parser.add_argument( + "--deck-name", + "-d", + type=str, + default="Polish UNESCO World Heritage Sites", + help="Name for the Anki deck", + ) + parser.add_argument( + "--preview", + "-p", + type=str, + default=None, + help="Export preview images to specified directory", + ) + parser.add_argument( + "--preview-count", + type=int, + default=5, + help="Number of preview images to export (default: 5)", + ) + + args = parser.parse_args(argv) + output_path = Path(args.output) if args.output else Path("polish_unesco_sites.apkg") + + try: + sys.stdout.write("Loading UNESCO sites data...\n") + sites = get_polish_unesco_sites() + poland_boundary = get_poland_boundary() + num_sites = len(sites) + + sys.stdout.write(f"Found {num_sites} UNESCO World Heritage Sites.\n") + sys.stdout.write("Generating flashcards...\n") + + package = generate_anki_package(sites, poland_boundary, args.deck_name) + package.write_to_file(str(output_path)) + + # Export preview images if requested + if args.preview: + preview_dir = Path(args.preview) + preview_dir.mkdir(parents=True, exist_ok=True) + preview_sites = list(sites.iterrows())[: args.preview_count] + sys.stdout.write( + f"Exporting {len(preview_sites)} preview images " + f"to {preview_dir}...\n" + ) + for _, row in preview_sites: + site_name = row["name"] + site_gdf = gpd.GeoDataFrame([row], crs=sites.crs) + image_data = generate_unesco_image_bytes(site_gdf, poland_boundary) + safe_name = site_name.replace(" ", "_").replace("/", "_") + preview_path = preview_dir / f"{safe_name}.png" + preview_path.write_bytes(image_data) + sys.stdout.write(f" Saved: {preview_path.name}\n") + + sys.stdout.write("\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write("FLASHCARD GENERATION COMPLETE\n") + sys.stdout.write("=" * 60 + "\n") + sys.stdout.write(f"UNESCO sites: {num_sites}\n") + sys.stdout.write(f"Output file: {output_path.absolute()}\n") + if args.preview: + sys.stdout.write(f"Preview images: {args.preview}\n") + except (OSError, ValueError, RuntimeError) as e: + sys.stderr.write(f"Error: {e}\n") + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python_pkg/polish_unesco_sites/run.sh b/python_pkg/polish_unesco_sites/run.sh new file mode 100755 index 0000000..40c5113 --- /dev/null +++ b/python_pkg/polish_unesco_sites/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Run the Polish UNESCO sites Anki generator + +cd "$(dirname "$0")" || exit + +python polish_unesco_sites_anki.py --preview preview_images --preview-count 5 "$@"