testsAndMisc/python_pkg/geo_data/_poland_nature.py

447 lines
14 KiB
Python
Raw Normal View History

"""Polish natural land features.
Functions for downloading and caching data about Polish mountains,
national parks, forests, nature reserves, and landscape parks.
"""
from __future__ import annotations
import contextlib
import json
import sys
from typing import TYPE_CHECKING
import geopandas as gpd
from python_pkg.geo_data._common import (
CACHE_DIR,
MIN_PEAK_ELEVATION,
_add_area_column,
_build_osiedla_geometry,
_ensure_cache_dir,
_extract_osiedla_rings,
_extract_polygon_from_element,
_extract_polygonal_geometry,
_overpass_query,
)
if TYPE_CHECKING:
from typing import Any
def get_polish_mountain_peaks() -> gpd.GeoDataFrame:
"""Get Polish mountain peaks, sorted by elevation descending.
Returns:
GeoDataFrame with mountain peak points and elevation.
"""
cache_path = CACHE_DIR / "polish_mountain_peaks.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
return gdf.sort_values("elevation", ascending=False).reset_index(drop=True)
sys.stdout.write("Fetching mountain peaks data from OSM...\n")
query = """
[out:json][timeout:120];
area["ISO3166-1"="PL"]->.pl;
(
node["natural"="peak"]["name"]["ele"](area.pl);
);
out;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
if element.get("type") != "node":
continue
name = element.get("tags", {}).get("name", "")
ele_str = element.get("tags", {}).get("ele", "")
if not name or not ele_str or name in seen_names:
continue
with contextlib.suppress(ValueError):
elevation = float(ele_str.replace(",", ".").split()[0])
if elevation < MIN_PEAK_ELEVATION:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name, "elevation": elevation},
"geometry": {
"type": "Point",
"coordinates": [element["lon"], element["lat"]],
},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} mountain peaks.\n")
if not features:
msg = "No mountain peaks found in OSM data"
raise ValueError(msg)
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
return gdf.sort_values("elevation", ascending=False).reset_index(drop=True)
def get_polish_mountain_ranges() -> gpd.GeoDataFrame:
"""Get Polish mountain ranges, sorted by area descending.
Returns:
GeoDataFrame with mountain range polygons.
"""
cache_path = CACHE_DIR / "polish_mountain_ranges.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching mountain ranges data from OSM...\n")
query = """
[out:json][timeout:180];
area["ISO3166-1"="PL"]->.pl;
(
relation["natural"="mountain_range"]["name"](area.pl);
way["natural"="mountain_range"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features: list[dict[str, Any]] = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
if element.get("type") == "relation":
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
geometry = _build_osiedla_geometry(outer_rings, inner_rings)
elif element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) < min_ring_coords:
continue
if coords[0] != coords[-1]:
coords.append(coords[0])
geometry = {"type": "Polygon", "coordinates": [coords]}
else:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} mountain ranges.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
# Calculate area in km²
gdf_proj = gdf.to_crs("EPSG:2180") # Polish coordinate system
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
def get_polish_national_parks() -> gpd.GeoDataFrame:
"""Get all 23 Polish national parks, sorted by area descending.
Returns:
GeoDataFrame with national park polygons.
"""
cache_path = CACHE_DIR / "polish_national_parks.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching national parks data from OSM...\n")
query = """
[out:json][timeout:180];
area["ISO3166-1"="PL"]->.pl;
(
relation["boundary"="national_park"]["name"](area.pl);
relation["leisure"="nature_reserve"]["name"]["protect_class"="2"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
if element.get("type") != "relation":
continue
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
# Filter to only include "Park Narodowy" in name
if "Narodowy" not in name and "narodowy" not in name.lower():
continue
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name},
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} national parks.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
# Calculate area in km²
gdf_proj = gdf.to_crs("EPSG:2180")
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
def get_polish_forests() -> gpd.GeoDataFrame:
"""Get major Polish forests (puszcze), sorted by area descending.
Returns:
GeoDataFrame with forest polygons.
"""
cache_path = CACHE_DIR / "polish_forests.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching forests data from OSM...\n")
# Query for named forests, especially "Puszcza" type
query = """
[out:json][timeout:300];
area["ISO3166-1"="PL"]->.pl;
(
relation["natural"="wood"]["name"](area.pl);
relation["landuse"="forest"]["name"~"Puszcza|Bory|Las"](area.pl);
way["natural"="wood"]["name"~"Puszcza|Bory"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
forest_keywords = ("Puszcza", "Bory", "Las ", "Lasy ")
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
if not any(keyword in name for keyword in forest_keywords):
continue
geometry = _extract_polygon_from_element(element)
if geometry is None:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} forests.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_area_column(gdf)
if len(gdf) > 0:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
def get_polish_nature_reserves() -> gpd.GeoDataFrame:
"""Get Polish nature reserves, sorted by area descending.
Returns:
GeoDataFrame with nature reserve polygons.
"""
cache_path = CACHE_DIR / "polish_nature_reserves.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write(
"Fetching nature reserves data from OSM (this may take a while)...\n"
)
query = """
[out:json][timeout:600];
area["ISO3166-1"="PL"]->.pl;
(
relation["leisure"="nature_reserve"]["name"](area.pl);
way["leisure"="nature_reserve"]["name"](area.pl);
relation["boundary"="protected_area"]["protect_class"="4"]["name"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
geometry = _extract_polygon_from_element(element)
if geometry is None:
continue
seen_names.add(name)
features.append(
{"type": "Feature", "properties": {"name": name}, "geometry": geometry}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} nature reserves.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
gdf = _add_area_column(gdf)
if len(gdf) > 0:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
def get_polish_landscape_parks() -> gpd.GeoDataFrame:
"""Get Polish landscape parks, sorted by area descending.
Returns:
GeoDataFrame with landscape park polygons.
"""
cache_path = CACHE_DIR / "polish_landscape_parks.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
# Remove any rows where geometry extraction failed
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
if "area_km2" in gdf.columns:
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf
sys.stdout.write("Fetching landscape parks data from OSM...\n")
query = """
[out:json][timeout:300];
area["ISO3166-1"="PL"]->.pl;
(
relation["boundary"="protected_area"]["protect_class"="5"]["name"](area.pl);
relation["leisure"="nature_reserve"]["name"~"Park Krajobrazowy"](area.pl);
);
out geom;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
min_ring_coords = 4
for element in data.get("elements", []):
if element.get("type") != "relation":
continue
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
outer_rings, inner_rings = _extract_osiedla_rings(element, min_ring_coords)
if not outer_rings:
continue
seen_names.add(name)
features.append(
{
"type": "Feature",
"properties": {"name": name},
"geometry": _build_osiedla_geometry(outer_rings, inner_rings),
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson, ensure_ascii=False))
sys.stdout.write(f"Cached {len(features)} landscape parks.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
# Fix invalid geometries from OSM data and extract only polygons
gdf["geometry"] = gdf.geometry.make_valid()
gdf["geometry"] = gdf.geometry.apply(_extract_polygonal_geometry)
# Remove any rows where geometry extraction failed
gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
if len(gdf) > 0:
gdf_proj = gdf.to_crs("EPSG:2180")
gdf["area_km2"] = gdf_proj.geometry.area / 1_000_000
return gdf.sort_values("area_km2", ascending=False).reset_index(drop=True)
return gdf