"""Read-only projection of cached data for the interactive web UI. Builds a compact, secrets-free dataset from the on-disk caches (snapshot, HLTB, ProtonDB, state) so a browser UI can filter games and estimate backlog completion times entirely client-side. This module performs **no network calls** — it only reads caches that previous ``scan``/``stats`` runs populated. The projection deliberately emits *every* incomplete, non-current, non-finished game with its raw HLTB-confidence counters and ProtonDB tiers, so the client can move its filter thresholds *below* the CLI defaults. The CLI default thresholds and a parity summary are included so the UI can show "matches the CLI" and so changes that break parity are easy to spot. """ from __future__ import annotations from dataclasses import asdict, dataclass, field from datetime import datetime, timezone from typing import Any from steam_backlog_enforcer._hltb_types import _read_raw_cache from steam_backlog_enforcer._scanning_confidence import ( _MIN_COMP_100_POLLS, _MIN_CONFIDENCE_SUM, _MIN_COUNT_COMP, ) from steam_backlog_enforcer.config import State, load_snapshot from steam_backlog_enforcer.protondb import ( MIN_PLAYABLE_TIER, ProtonDBRating, _load_cache, _rating_from_cache, ) from steam_backlog_enforcer.steam_api import GameInfo # Mirrors ``_stats._HOURS_PER_DAY_PRESETS`` but mutable/JSON-friendly. HOURS_PER_DAY_PRESETS = [2.0, 4.0, 6.0, 8.0] @dataclass class WebGame: """One incomplete candidate game, with raw filterable fields. Hour fields use ``-1`` to mean "no data" (matching the cache convention), so the client can choose to include or exclude unknown-length games. """ app_id: int name: str completion_pct: float playtime_minutes: int rush_hours: float leisure_hours: float worst_hours: float count_comp: int comp_100_count: int hltb_game_id: int protondb_tier: str protondb_trending_tier: str protondb_score: float @dataclass class WebStateInfo: """Pace inputs and current-assignment metadata for the UI.""" current_app_id: int | None current_game_name: str games_done: int games_done_since_start: int days_elapsed: int enforcement_started_at: str pace_games_per_day: float @dataclass class WebDefaults: """The CLI's hardcoded filter thresholds, surfaced as editable defaults.""" min_comp_100_polls: int min_count_comp: int min_confidence_sum: int min_playable_tier: str hours_per_day_presets: list[float] @dataclass class DefaultSummary: """Totals the CLI ``stats`` command would print at default thresholds. Used as a parity oracle: the client's own default-filtered totals must reproduce these numbers. """ qualifying: int rush_total: float leisure_total: float worst_total: float @dataclass class PaceVsHLTB: """Player pace calibrated against HLTB rush/leisure averages. Derived from completed games that have HLTB detail data. All ratio / interpolation fields use ``-1`` to mean "insufficient data", matching the cache convention used elsewhere. Fields: calibration_count: number of completed games used for calibration. ratio_vs_rush: actual_hours / rush_hours across calibration games. ratio_vs_leisure: actual_hours / leisure_hours (-1 if no leisure data). interpolation_t: position between rush (0.0) and leisure (1.0) speed. Negative means faster than rush; >1 means slower than leisure. -1 means insufficient data. player_style: human-readable style label. """ calibration_count: int ratio_vs_rush: float ratio_vs_leisure: float interpolation_t: float player_style: str @dataclass class WebDataset: """Full payload served to the browser.""" games: list[WebGame] state: WebStateInfo defaults: WebDefaults default_summary: DefaultSummary pace_vs_hltb: PaceVsHLTB | None generated_at: str = field(default="") def _worst_hours(game: GameInfo, cache_hours: float, leisure: float) -> float: """Replicate ``_stats`` worst-case selection exactly. worst = max of snapshot completionist hours, the HLTB hours-cache value, and the leisure-100% time — considering only positive values. """ snap_hours = game.completionist_hours if game.completionist_hours > 0 else -1 candidates = [v for v in (snap_hours, cache_hours, leisure) if v > 0] return max(candidates) if candidates else -1.0 def _passes_default_confidence(game: WebGame) -> bool: """True if the game clears all three CLI HLTB-confidence thresholds.""" if game.comp_100_count < _MIN_COMP_100_POLLS: return False if game.count_comp < _MIN_COUNT_COMP: return False return game.comp_100_count + game.count_comp >= _MIN_CONFIDENCE_SUM def _has_any_time(game: WebGame) -> bool: """True if the game has at least one positive time estimate.""" return game.worst_hours > 0 or game.rush_hours > 0 or game.leisure_hours > 0 def _build_games(games: list[GameInfo], exclude: set[int]) -> list[WebGame]: """Project incomplete, non-excluded games into compact rows (no network).""" raw = _read_raw_cache() protondb_cache = _load_cache() rows: list[WebGame] = [] for game in games: if game.is_complete or game.app_id in exclude: continue entry = raw.get(game.app_id, {}) rush = float(entry.get("rush_hours", -1)) leisure = float(entry.get("leisure_100h", -1)) cache_hours = float(entry.get("hours", -1)) count_comp = int(entry.get("count_comp", 0)) comp_100_count = int(entry.get("polls", 0)) hltb_game_id = int(entry.get("hltb_game_id", 0)) rating: ProtonDBRating = ( _rating_from_cache(game.app_id, protondb_cache[str(game.app_id)]) if str(game.app_id) in protondb_cache else ProtonDBRating(app_id=game.app_id) ) rows.append( WebGame( app_id=game.app_id, name=game.name, completion_pct=round(game.completion_pct, 1), playtime_minutes=game.playtime_minutes, rush_hours=rush, leisure_hours=leisure, worst_hours=_worst_hours(game, cache_hours, leisure), count_comp=count_comp, comp_100_count=comp_100_count, hltb_game_id=hltb_game_id, protondb_tier=rating.tier, protondb_trending_tier=rating.trending_tier, protondb_score=rating.score, ) ) return rows def _default_qualifying(rows: list[WebGame]) -> list[WebGame]: """Apply the exact CLI default filters (confidence + ProtonDB + has-data).""" qualifying: list[WebGame] = [] for game in rows: if not _passes_default_confidence(game): continue rating = ProtonDBRating( app_id=game.app_id, tier=game.protondb_tier, trending_tier=game.protondb_trending_tier, ) if not rating.is_playable: continue if not _has_any_time(game): continue qualifying.append(game) return qualifying def _sum_positive(rows: list[WebGame], attr: str) -> float: """Sum a positive-only hour attribute across rows (matches ``_sum_hours``).""" total = sum(getattr(g, attr) for g in rows if getattr(g, attr) > 0) return round(total, 1) def _default_summary(rows: list[WebGame]) -> DefaultSummary: """Compute the CLI parity totals at default thresholds.""" qualifying = _default_qualifying(rows) return DefaultSummary( qualifying=len(qualifying), rush_total=_sum_positive(qualifying, "rush_hours"), leisure_total=_sum_positive(qualifying, "leisure_hours"), worst_total=_sum_positive(qualifying, "worst_hours"), ) def count_complete_since_start(games: list[GameInfo], started_at: str) -> int: """Count complete games whose last achievement was unlocked on/after started_at. Games with no achievement timestamp data are excluded — their completion date is unknown, and they were most likely finished before Steam began recording unlock timestamps (i.e. before the enforcement period). Returns 0 when started_at is empty or unparseable. """ if not started_at: return 0 try: started = datetime.fromisoformat(started_at) except ValueError: return 0 started_ts = int(started.timestamp()) count = 0 for game in games: if not game.is_complete: continue achieved_times = [ a.unlock_time for a in game.achievements if a.achieved and a.unlock_time > 0 ] if not achieved_times: continue if max(achieved_times) >= started_ts: count += 1 return count def _state_info( state: State, games_done: int, games_done_since_start: int ) -> WebStateInfo: """Build pace metadata, mirroring ``_print_pace_scenario`` inputs.""" days_elapsed = 0 pace = 0.0 if state.enforcement_started_at: try: started = datetime.fromisoformat(state.enforcement_started_at) except ValueError: started = None if started is not None: now = datetime.now(timezone.utc) days_elapsed = max(1, (now - started).days) if games_done_since_start > 0: pace = round(games_done_since_start / days_elapsed, 4) return WebStateInfo( current_app_id=state.current_app_id, current_game_name=state.current_game_name, games_done=games_done, games_done_since_start=games_done_since_start, days_elapsed=days_elapsed, enforcement_started_at=state.enforcement_started_at, pace_games_per_day=pace, ) def _collect_calibration_pairs( raw_games: list[GameInfo], raw_cache: dict[int, dict[str, Any]], ) -> tuple[list[tuple[float, float]], list[tuple[float, float, float]]]: """Separate complete games into rush-only and rush+leisure sample sets.""" rush_pairs: list[tuple[float, float]] = [] both_pairs: list[tuple[float, float, float]] = [] for game in raw_games: if not game.is_complete or game.playtime_minutes <= 0: continue entry = raw_cache.get(game.app_id, {}) rush = float(entry.get("rush_hours", -1)) leisure = float(entry.get("leisure_100h", -1)) actual = game.playtime_minutes / 60.0 if rush > 0: rush_pairs.append((actual, rush)) if rush > 0 and leisure > 0: both_pairs.append((actual, rush, leisure)) return rush_pairs, both_pairs def _interpolate_from_both( both_pairs: list[tuple[float, float, float]], ) -> tuple[float, float]: """Return (ratio_vs_leisure, interpolation_t) from (actual, rush, leisure) triples. Returns -1.0 for interpolation_t when leisure <= rush (degenerate data). """ sum_actual = sum(p[0] for p in both_pairs) sum_rush = sum(p[1] for p in both_pairs) sum_leisure = sum(p[2] for p in both_pairs) ratio_vs_leisure = round(sum_actual / sum_leisure, 3) if sum_leisure > sum_rush: t = round((sum_actual - sum_rush) / (sum_leisure - sum_rush), 3) else: t = -1.0 return ratio_vs_leisure, t def _classify_player_style(interpolation_t: float, ratio_vs_rush: float) -> str: """Map calibration metrics to a player-style label.""" if interpolation_t != -1.0: if interpolation_t < 0: return "faster_than_rush" if interpolation_t <= 1.0: return "rush_to_leisure" return "slower_than_leisure" return "faster_than_rush" if ratio_vs_rush < 1.0 else "unknown" def compute_pace_vs_hltb( raw_games: list[GameInfo], raw_cache: dict[int, dict[str, Any]], ) -> PaceVsHLTB | None: """Compute player pace relative to HLTB rush/leisure averages. Uses completed games (100 % achievements, positive playtime) as calibration samples. Steam playtime includes idle time, so ratios > 1 are expected for most players. Args: raw_games: All games from the snapshot (completed + incomplete). raw_cache: The full HLTB cache (from ``_read_raw_cache()``). Returns: A ``PaceVsHLTB`` when at least one completed game has rush data, ``None`` when there is no calibration data at all. """ rush_pairs, both_pairs = _collect_calibration_pairs(raw_games, raw_cache) if not rush_pairs: return None ratio_vs_rush = round( sum(p[0] for p in rush_pairs) / sum(p[1] for p in rush_pairs), 3 ) if both_pairs: ratio_vs_leisure, interpolation_t = _interpolate_from_both(both_pairs) else: ratio_vs_leisure = -1.0 interpolation_t = -1.0 return PaceVsHLTB( calibration_count=len(rush_pairs), ratio_vs_rush=ratio_vs_rush, ratio_vs_leisure=ratio_vs_leisure, interpolation_t=interpolation_t, player_style=_classify_player_style(interpolation_t, ratio_vs_rush), ) def build_web_dataset(state: State) -> WebDataset: """Build the full web dataset from on-disk caches (no network calls). Args: state: The loaded enforcer state (current game, finished IDs, pace). Returns: A ``WebDataset`` with every incomplete candidate game, the CLI default thresholds, and a parity summary. Raises no exceptions for a missing snapshot — it returns an empty game list instead. """ snapshot = load_snapshot() raw_games = ( [GameInfo.from_snapshot(d) for d in snapshot] if snapshot is not None else [] ) games_done = sum(1 for g in raw_games if g.is_complete) games_done_since_start = count_complete_since_start( raw_games, state.enforcement_started_at ) exclude = set(state.finished_app_ids) if state.current_app_id is not None: exclude.add(state.current_app_id) rows = _build_games(raw_games, exclude) raw_cache = _read_raw_cache() pace_vs_hltb = compute_pace_vs_hltb(raw_games, raw_cache) return WebDataset( games=rows, state=_state_info(state, games_done, games_done_since_start), defaults=WebDefaults( min_comp_100_polls=_MIN_COMP_100_POLLS, min_count_comp=_MIN_COUNT_COMP, min_confidence_sum=_MIN_CONFIDENCE_SUM, min_playable_tier=MIN_PLAYABLE_TIER, hours_per_day_presets=list(HOURS_PER_DAY_PRESETS), ), default_summary=_default_summary(rows), pace_vs_hltb=pace_vs_hltb, generated_at=datetime.now(timezone.utc).isoformat(), ) def dataset_to_payload(dataset: WebDataset) -> dict[str, Any]: """Serialize a ``WebDataset`` to a JSON-ready dict.""" return asdict(dataset)