mirror of
https://github.com/kuhyx/testsAndMisc.git
synced 2026-07-04 18:03:07 +02:00
- Add comprehensive tests for all packages (3572 tests, 100% branch coverage) - Split oversized test files to stay under 500-line limit - Add per-file ruff ignores for test-appropriate suppressions - Fix _cache_decks.py to properly convert JSON lists to tuples - Add session-scoped conftest fixture for logging handler cleanup (Python 3.14) - Update ruff pre-commit hook to v0.15.2 - Add codespell ignore words for test data - Add generated output files to .gitignore
190 lines
5.8 KiB
Python
190 lines
5.8 KiB
Python
"""Warsaw streets, landmarks, and place data.
|
|
|
|
Functions for downloading and caching Warsaw streets, landmarks,
|
|
and other place-related geographic data.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
|
|
import geopandas as gpd
|
|
from shapely.geometry import MultiLineString
|
|
|
|
from python_pkg.geo_data._common import CACHE_DIR, _ensure_cache_dir, _overpass_query
|
|
|
|
|
|
def get_warsaw_streets(min_length: int = 500) -> gpd.GeoDataFrame:
|
|
"""Get major Warsaw streets.
|
|
|
|
Args:
|
|
min_length: Minimum street length in meters.
|
|
|
|
Returns:
|
|
GeoDataFrame with street geometries.
|
|
"""
|
|
cache_path = CACHE_DIR / "warsaw_streets.geojson"
|
|
|
|
if cache_path.exists():
|
|
gdf = gpd.read_file(cache_path)
|
|
# Filter by length if needed
|
|
return _filter_streets_by_length(gdf, min_length)
|
|
|
|
sys.stdout.write("Fetching street data from OpenStreetMap...\n")
|
|
query = """
|
|
[out:json][timeout:120];
|
|
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
(
|
|
way["highway"="primary"]["name"](area.warsaw);
|
|
way["highway"="secondary"]["name"](area.warsaw);
|
|
way["highway"="tertiary"]["name"](area.warsaw);
|
|
);
|
|
out geom;
|
|
"""
|
|
|
|
data = _overpass_query(query)
|
|
|
|
features = []
|
|
min_coords = 2
|
|
|
|
for element in data.get("elements", []):
|
|
if element.get("type") == "way" and "geometry" in element:
|
|
coords = [(p["lon"], p["lat"]) for p in element["geometry"]]
|
|
if len(coords) >= min_coords:
|
|
features.append(
|
|
{
|
|
"type": "Feature",
|
|
"properties": {
|
|
"name": element.get("tags", {}).get("name", "Unknown"),
|
|
"highway": element.get("tags", {}).get("highway", ""),
|
|
},
|
|
"geometry": {"type": "LineString", "coordinates": coords},
|
|
}
|
|
)
|
|
|
|
_ensure_cache_dir()
|
|
geojson = {"type": "FeatureCollection", "features": features}
|
|
cache_path.write_text(json.dumps(geojson))
|
|
|
|
sys.stdout.write(f"Cached {len(features)} street segments.\n")
|
|
|
|
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|
|
return _filter_streets_by_length(gdf, min_length)
|
|
|
|
|
|
def _filter_streets_by_length(
|
|
gdf: gpd.GeoDataFrame, min_length: int
|
|
) -> gpd.GeoDataFrame:
|
|
"""Filter and merge streets by name, keeping only those above min_length.
|
|
|
|
Args:
|
|
gdf: GeoDataFrame with street segments.
|
|
min_length: Minimum length in meters.
|
|
|
|
Returns:
|
|
GeoDataFrame with merged streets, sorted by length (longest first).
|
|
"""
|
|
# Group by street name
|
|
streets: dict[str, list] = {}
|
|
for _, row in gdf.iterrows():
|
|
name = row.get("name", "Unknown")
|
|
if name and name != "Unknown":
|
|
if name not in streets:
|
|
streets[name] = []
|
|
streets[name].append(row.geometry)
|
|
|
|
# Merge and filter
|
|
result_rows = []
|
|
for name, geometries in streets.items():
|
|
merged = geometries[0] if len(geometries) == 1 else MultiLineString(geometries)
|
|
|
|
# Create temp GeoDataFrame for length calculation
|
|
temp_gdf = gpd.GeoDataFrame(geometry=[merged], crs="EPSG:4326")
|
|
temp_proj = temp_gdf.to_crs("EPSG:2180") # Polish coordinate system
|
|
length = temp_proj.geometry.length.iloc[0]
|
|
|
|
if length >= min_length:
|
|
result_rows.append({"name": name, "geometry": merged, "length_m": length})
|
|
|
|
# Sort by length (longest first)
|
|
result_rows.sort(key=lambda x: x["length_m"], reverse=True)
|
|
|
|
return gpd.GeoDataFrame(
|
|
result_rows,
|
|
crs="EPSG:4326" if result_rows else None,
|
|
)
|
|
|
|
|
|
def get_warsaw_landmarks() -> gpd.GeoDataFrame:
|
|
"""Get Warsaw landmarks (museums, monuments, parks, etc.).
|
|
|
|
Returns:
|
|
GeoDataFrame with landmark points.
|
|
"""
|
|
cache_path = CACHE_DIR / "warsaw_landmarks.geojson"
|
|
|
|
if cache_path.exists():
|
|
return gpd.read_file(cache_path)
|
|
|
|
sys.stdout.write("Fetching landmark data...\n")
|
|
# Simplified query - just museums and major attractions
|
|
query = """
|
|
[out:json][timeout:60];
|
|
area["name"="Warszawa"]["admin_level"="6"]->.warsaw;
|
|
(
|
|
node["tourism"="museum"]["name"](area.warsaw);
|
|
node["tourism"="attraction"]["name"](area.warsaw);
|
|
node["historic"="monument"]["name"](area.warsaw);
|
|
way["tourism"="museum"]["name"](area.warsaw);
|
|
way["tourism"="attraction"]["name"](area.warsaw);
|
|
);
|
|
out center;
|
|
"""
|
|
|
|
data = _overpass_query(query)
|
|
|
|
features = []
|
|
seen_names: set[str] = set()
|
|
|
|
for element in data.get("elements", []):
|
|
name = element.get("tags", {}).get("name", "")
|
|
if not name or name in seen_names:
|
|
continue
|
|
|
|
# Get coordinates
|
|
if element.get("type") == "node":
|
|
lon, lat = element["lon"], element["lat"]
|
|
elif "center" in element:
|
|
lon, lat = element["center"]["lon"], element["center"]["lat"]
|
|
else:
|
|
continue
|
|
|
|
seen_names.add(name)
|
|
landmark_type = (
|
|
element.get("tags", {}).get("tourism")
|
|
or element.get("tags", {}).get("historic")
|
|
or element.get("tags", {}).get("leisure")
|
|
or "landmark"
|
|
)
|
|
|
|
features.append(
|
|
{
|
|
"type": "Feature",
|
|
"properties": {"name": name, "type": landmark_type},
|
|
"geometry": {"type": "Point", "coordinates": [lon, lat]},
|
|
}
|
|
)
|
|
|
|
_ensure_cache_dir()
|
|
geojson = {"type": "FeatureCollection", "features": features}
|
|
cache_path.write_text(json.dumps(geojson))
|
|
|
|
sys.stdout.write(f"Cached {len(features)} landmarks.\n")
|
|
|
|
if not features:
|
|
return gpd.GeoDataFrame(
|
|
{"name": [], "type": [], "geometry": []}, crs="EPSG:4326"
|
|
)
|
|
return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
|