testsAndMisc/python_pkg/geo_data/_warsaw_places.py

190 lines
5.8 KiB
Python
Raw Normal View History

"""Warsaw streets, landmarks, and place data.
Functions for downloading and caching Warsaw streets, landmarks,
and other place-related geographic data.
"""
from __future__ import annotations
import json
import sys
import geopandas as gpd
from shapely.geometry import MultiLineString
from python_pkg.geo_data._common import CACHE_DIR, _ensure_cache_dir, _overpass_query
def get_warsaw_streets(min_length: int = 500) -> gpd.GeoDataFrame:
"""Get major Warsaw streets.
Args:
min_length: Minimum street length in meters.
Returns:
GeoDataFrame with street geometries.
"""
cache_path = CACHE_DIR / "warsaw_streets.geojson"
if cache_path.exists():
gdf = gpd.read_file(cache_path)
# Filter by length if needed
return _filter_streets_by_length(gdf, min_length)
sys.stdout.write("Fetching street data from OpenStreetMap...\n")
query = """
[out:json][timeout:120];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
(
way["highway"="primary"]["name"](area.warsaw);
way["highway"="secondary"]["name"](area.warsaw);
way["highway"="tertiary"]["name"](area.warsaw);
);
out geom;
"""
data = _overpass_query(query)
features = []
min_coords = 2
for element in data.get("elements", []):
if element.get("type") == "way" and "geometry" in element:
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
if len(coords) >= min_coords:
features.append(
{
"type": "Feature",
"properties": {
"name": element.get("tags", {}).get("name", "Unknown"),
"highway": element.get("tags", {}).get("highway", ""),
},
"geometry": {"type": "LineString", "coordinates": coords},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(features)} street segments.\n")
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
return _filter_streets_by_length(gdf, min_length)
def _filter_streets_by_length(
gdf: gpd.GeoDataFrame, min_length: int
) -> gpd.GeoDataFrame:
"""Filter and merge streets by name, keeping only those above min_length.
Args:
gdf: GeoDataFrame with street segments.
min_length: Minimum length in meters.
Returns:
GeoDataFrame with merged streets, sorted by length (longest first).
"""
# Group by street name
streets: dict[str, list] = {}
for _, row in gdf.iterrows():
name = row.get("name", "Unknown")
if name and name != "Unknown":
if name not in streets:
streets[name] = []
streets[name].append(row.geometry)
# Merge and filter
result_rows = []
for name, geometries in streets.items():
merged = geometries[0] if len(geometries) == 1 else MultiLineString(geometries)
# Create temp GeoDataFrame for length calculation
temp_gdf = gpd.GeoDataFrame(geometry=[merged], crs="EPSG:4326")
temp_proj = temp_gdf.to_crs("EPSG:2180") # Polish coordinate system
length = temp_proj.geometry.length.iloc[0]
if length >= min_length:
result_rows.append({"name": name, "geometry": merged, "length_m": length})
# Sort by length (longest first)
result_rows.sort(key=lambda x: x["length_m"], reverse=True)
return gpd.GeoDataFrame(
result_rows,
crs="EPSG:4326" if result_rows else None,
)
def get_warsaw_landmarks() -> gpd.GeoDataFrame:
"""Get Warsaw landmarks (museums, monuments, parks, etc.).
Returns:
GeoDataFrame with landmark points.
"""
cache_path = CACHE_DIR / "warsaw_landmarks.geojson"
if cache_path.exists():
return gpd.read_file(cache_path)
sys.stdout.write("Fetching landmark data...\n")
# Simplified query - just museums and major attractions
query = """
[out:json][timeout:60];
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
(
node["tourism"="museum"]["name"](area.warsaw);
node["tourism"="attraction"]["name"](area.warsaw);
node["historic"="monument"]["name"](area.warsaw);
way["tourism"="museum"]["name"](area.warsaw);
way["tourism"="attraction"]["name"](area.warsaw);
);
out center;
"""
data = _overpass_query(query)
features = []
seen_names: set[str] = set()
for element in data.get("elements", []):
name = element.get("tags", {}).get("name", "")
if not name or name in seen_names:
continue
# Get coordinates
if element.get("type") == "node":
lon, lat = element["lon"], element["lat"]
elif "center" in element:
lon, lat = element["center"]["lon"], element["center"]["lat"]
else:
continue
seen_names.add(name)
landmark_type = (
element.get("tags", {}).get("tourism")
or element.get("tags", {}).get("historic")
or element.get("tags", {}).get("leisure")
or "landmark"
)
features.append(
{
"type": "Feature",
"properties": {"name": name, "type": landmark_type},
"geometry": {"type": "Point", "coordinates": [lon, lat]},
}
)
_ensure_cache_dir()
geojson = {"type": "FeatureCollection", "features": features}
cache_path.write_text(json.dumps(geojson))
sys.stdout.write(f"Cached {len(features)} landmarks.\n")
if not features:
return gpd.GeoDataFrame(
{"name": [], "type": [], "geometry": []}, crs="EPSG:4326"
)
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")