testsAndMisc/python_pkg/steam_backlog_enforcer/hltb.py

126 lines
3.8 KiB
Python

"""HowLongToBeat integration for estimating game completion times."""
from __future__ import annotations
import asyncio
from dataclasses import dataclass
import json
import logging
from howlongtobeatpy import HowLongToBeat
from python_pkg.steam_backlog_enforcer.config import CONFIG_DIR
logger = logging.getLogger(__name__)
HLTB_CACHE_FILE = CONFIG_DIR / "hltb_cache.json"
MAX_CONCURRENT = 30
MIN_SIMILARITY = 0.5
@dataclass
class HLTBResult:
"""Result from a HowLongToBeat lookup."""
app_id: int
game_name: str
completionist_hours: float
similarity: float
def load_hltb_cache() -> dict[int, float]:
"""Load the persistent HLTB cache from disk.
Returns: dict mapping app_id -> completionist_hours.
"""
if HLTB_CACHE_FILE.exists():
try:
data = json.loads(HLTB_CACHE_FILE.read_text(encoding="utf-8"))
return {int(k): float(v) for k, v in data.items()}
except (json.JSONDecodeError, ValueError, OSError):
logger.warning("Corrupt HLTB cache, starting fresh.")
return {}
def save_hltb_cache(cache: dict[int, float]) -> None:
"""Save the HLTB cache to disk."""
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
try:
HLTB_CACHE_FILE.write_text(
json.dumps({str(k): v for k, v in cache.items()}, indent=2) + "\n",
encoding="utf-8",
)
except OSError:
logger.exception("Failed to save HLTB cache")
async def _search_one(
sem: asyncio.Semaphore, app_id: int, name: str
) -> HLTBResult | None:
"""Search HLTB for a single game."""
async with sem:
try:
results = await HowLongToBeat().async_search(name)
if results:
best = max(results, key=lambda r: r.similarity)
if best.similarity >= MIN_SIMILARITY:
comp = best.completionist
if comp and comp > 0:
return HLTBResult(
app_id=app_id,
game_name=name,
completionist_hours=comp,
similarity=best.similarity,
)
except (OSError, ValueError, TypeError, AttributeError) as e:
logger.debug("HLTB search failed for '%s': %s", name, e)
return None
async def _fetch_batch(
games: list[tuple[int, str]],
) -> list[HLTBResult]:
"""Fetch HLTB data for a batch of games concurrently."""
sem = asyncio.Semaphore(MAX_CONCURRENT)
tasks = [_search_one(sem, app_id, name) for app_id, name in games]
results = await asyncio.gather(*tasks)
return [r for r in results if r is not None]
def fetch_hltb_times(games: list[tuple[int, str]]) -> list[HLTBResult]:
"""Synchronous wrapper: fetch HLTB times for games."""
if not games:
return []
return asyncio.run(_fetch_batch(games))
def fetch_hltb_times_cached(
games: list[tuple[int, str]],
) -> dict[int, float]:
"""Fetch HLTB times, using disk cache for already-known games.
Returns: dict mapping app_id -> completionist_hours.
"""
cache = load_hltb_cache()
uncached = [(app_id, name) for app_id, name in games if app_id not in cache]
if uncached:
logger.info(
"Fetching HLTB data for %d uncached games (out of %d total)...",
len(uncached),
len(games),
)
results = fetch_hltb_times(uncached)
for r in results:
cache[r.app_id] = r.completionist_hours
# Also cache misses as -1 so we don't re-fetch them.
found_ids = {r.app_id for r in results}
for app_id, _ in uncached:
if app_id not in found_ids:
cache[app_id] = -1
save_hltb_cache(cache)
else:
logger.info("All %d games found in HLTB cache.", len(games))
return cache