mirror of
https://github.com/kuhyx/steam-backlog-enforcer.git
synced 2026-07-04 12:03:13 +02:00
User can now pick any owned game by Steam app_id via `pick-manual <id>`. The script resolves the game name, asks for YES confirmation, then locks all other commands for 14 days or until the game is 100% complete. Post-assignment steps (uninstall others, install, hide library) mirror the automatic pick flow. Lock is checked before every command including add-exception. Also fixes pre-existing test failures in hltb, stats, and web_dataset modules and adds 100% coverage for all changed code. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
385 lines
12 KiB
Python
385 lines
12 KiB
Python
"""HowLongToBeat integration for estimating game completion times.
|
|
|
|
Fetches leisure completionist hour estimates from howlongtobeat.com with:
|
|
- direct API calls (bypassing the slow howlongtobeatpy per-request setup)
|
|
- single shared aiohttp session for all requests
|
|
- concurrent requests with configurable concurrency
|
|
- live progress reporting via callback
|
|
- incremental disk-cache saves so crashes don't lose work
|
|
- leisure time (upper-bound play time) from individual game pages
|
|
- DLC time aggregation (base game + all DLC leisure times combined)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
|
|
import aiohttp
|
|
|
|
from steam_backlog_enforcer._hltb_search import (
|
|
_fetch_batch,
|
|
_get_auth_info,
|
|
_get_hltb_search_url,
|
|
_search_one,
|
|
_SearchCtx,
|
|
)
|
|
from steam_backlog_enforcer._hltb_types import (
|
|
HLTB_BASE_URL,
|
|
MAX_CONCURRENT,
|
|
HLTBResult,
|
|
ProgressCb,
|
|
_HLTBExtras,
|
|
load_hltb_cache,
|
|
load_hltb_count_comp_cache,
|
|
load_hltb_game_id_cache,
|
|
load_hltb_leisure_100h_cache,
|
|
load_hltb_polls_cache,
|
|
load_hltb_rush_cache,
|
|
save_hltb_cache,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────
|
|
# Confidence-only batch fetch (no leisure/DLC detail pages)
|
|
# ──────────────────────────────────────────────────────────────
|
|
async def _fetch_batch_confidence_only(
|
|
games: list[tuple[int, str]],
|
|
cache: dict[int, float],
|
|
polls: dict[int, int],
|
|
progress_cb: ProgressCb | None,
|
|
count_comp: dict[int, int] | None = None,
|
|
) -> list[HLTBResult]:
|
|
"""Fetch only search-level HLTB data (hours + confidence), no detail pages."""
|
|
# 1. Discover the search URL (sync, one-time).
|
|
search_url = _get_hltb_search_url()
|
|
logger.info("HLTB search URL: %s", search_url)
|
|
|
|
timeout = aiohttp.ClientTimeout(total=20, sock_read=15)
|
|
|
|
# 2. Get auth info (separate session — avoids reuse issues).
|
|
async with aiohttp.ClientSession(timeout=timeout) as init_session:
|
|
auth = await _get_auth_info(search_url, init_session)
|
|
if auth is None:
|
|
logger.warning("Could not get HLTB auth info, aborting fetch.")
|
|
return []
|
|
logger.info("HLTB auth token acquired.")
|
|
|
|
# 3. Build shared headers for all search requests.
|
|
headers: dict[str, str] = {
|
|
"content-type": "application/json",
|
|
"accept": "*/*",
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0"
|
|
),
|
|
"referer": "https://howlongtobeat.com/",
|
|
"x-auth-token": auth.token,
|
|
}
|
|
if auth.hp_key:
|
|
headers["x-hp-key"] = auth.hp_key
|
|
headers["x-hp-val"] = auth.hp_val
|
|
|
|
# 4. Fire all searches through a single persistent session.
|
|
sem = asyncio.Semaphore(MAX_CONCURRENT)
|
|
counter = {"done": 0, "found": 0}
|
|
total = len(games)
|
|
|
|
if count_comp is None:
|
|
count_comp = {}
|
|
|
|
connector = aiohttp.TCPConnector(
|
|
limit=MAX_CONCURRENT,
|
|
keepalive_timeout=30,
|
|
)
|
|
async with aiohttp.ClientSession(
|
|
timeout=timeout,
|
|
connector=connector,
|
|
) as session:
|
|
ctx = _SearchCtx(
|
|
session=session,
|
|
search_url=search_url,
|
|
headers=headers,
|
|
cache=cache,
|
|
polls=polls,
|
|
count_comp=count_comp,
|
|
auth=auth,
|
|
counter=counter,
|
|
total=total,
|
|
progress_cb=progress_cb,
|
|
)
|
|
tasks = [
|
|
_search_one(
|
|
sem,
|
|
ctx,
|
|
app_id,
|
|
name,
|
|
)
|
|
for app_id, name in games
|
|
]
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
return [r for r in results if r is not None]
|
|
|
|
|
|
def fetch_hltb_times(
|
|
games: list[tuple[int, str]],
|
|
cache: dict[int, float] | None = None,
|
|
polls: dict[int, int] | None = None,
|
|
progress_cb: ProgressCb | None = None,
|
|
extras: _HLTBExtras | None = None,
|
|
) -> list[HLTBResult]:
|
|
"""Synchronous wrapper: fetch HLTB times for games."""
|
|
if not games:
|
|
return []
|
|
if cache is None:
|
|
cache = {}
|
|
if polls is None:
|
|
polls = {}
|
|
return asyncio.run(
|
|
_fetch_batch(
|
|
games,
|
|
cache,
|
|
polls,
|
|
progress_cb,
|
|
extras=extras,
|
|
)
|
|
)
|
|
|
|
|
|
def fetch_hltb_confidence(
|
|
games: list[tuple[int, str]],
|
|
cache: dict[int, float] | None = None,
|
|
polls: dict[int, int] | None = None,
|
|
progress_cb: ProgressCb | None = None,
|
|
count_comp: dict[int, int] | None = None,
|
|
) -> list[HLTBResult]:
|
|
"""Fetch only HLTB search-level data (hours + confidence metrics)."""
|
|
if not games:
|
|
return []
|
|
if cache is None:
|
|
cache = {}
|
|
if polls is None:
|
|
polls = {}
|
|
if count_comp is None:
|
|
count_comp = {}
|
|
return asyncio.run(
|
|
_fetch_batch_confidence_only(
|
|
games,
|
|
cache,
|
|
polls,
|
|
progress_cb,
|
|
count_comp=count_comp,
|
|
)
|
|
)
|
|
|
|
|
|
def fetch_hltb_times_cached(
|
|
games: list[tuple[int, str]],
|
|
progress_cb: ProgressCb | None = None,
|
|
) -> dict[int, float]:
|
|
"""Fetch HLTB times, using disk cache for already-known games.
|
|
|
|
Args:
|
|
games: list of (app_id, name) tuples to look up.
|
|
progress_cb: optional callback(done, total, found, game_name).
|
|
|
|
Returns: dict mapping app_id -> completionist_hours.
|
|
"""
|
|
cache = load_hltb_cache()
|
|
polls = load_hltb_polls_cache()
|
|
extras = _HLTBExtras(
|
|
count_comp=load_hltb_count_comp_cache(),
|
|
rush=load_hltb_rush_cache(),
|
|
leisure_100h=load_hltb_leisure_100h_cache(),
|
|
)
|
|
uncached = [(app_id, name) for app_id, name in games if app_id not in cache]
|
|
|
|
if uncached:
|
|
logger.info(
|
|
"Fetching HLTB data for %d uncached games (%d cached)...",
|
|
len(uncached),
|
|
len(games) - len(uncached),
|
|
)
|
|
t0 = time.monotonic()
|
|
fetch_hltb_times(
|
|
uncached,
|
|
cache=cache,
|
|
polls=polls,
|
|
progress_cb=progress_cb,
|
|
extras=extras,
|
|
)
|
|
elapsed = time.monotonic() - t0
|
|
|
|
# Final save.
|
|
save_hltb_cache(cache, polls, extras)
|
|
|
|
found = sum(1 for aid, _ in uncached if cache.get(aid, -1) > 0)
|
|
rate = len(uncached) / elapsed if elapsed > 0 else 0
|
|
logger.info(
|
|
"HLTB fetch done: %d/%d found in %.1fs (%.0f games/s)",
|
|
found,
|
|
len(uncached),
|
|
elapsed,
|
|
rate,
|
|
)
|
|
else:
|
|
logger.info("All %d games found in HLTB cache.", len(games))
|
|
|
|
return cache
|
|
|
|
|
|
def fetch_hltb_confidence_cached(
|
|
games: list[tuple[int, str]],
|
|
progress_cb: ProgressCb | None = None,
|
|
) -> dict[int, float]:
|
|
"""Fetch HLTB search-level confidence data, using disk cache for known IDs."""
|
|
cache = load_hltb_cache()
|
|
polls = load_hltb_polls_cache()
|
|
count_comp = load_hltb_count_comp_cache()
|
|
uncached = [(app_id, name) for app_id, name in games if app_id not in cache]
|
|
|
|
if uncached:
|
|
logger.info(
|
|
"Fetching HLTB confidence for %d uncached games (%d cached)...",
|
|
len(uncached),
|
|
len(games) - len(uncached),
|
|
)
|
|
t0 = time.monotonic()
|
|
fetch_hltb_confidence(
|
|
uncached,
|
|
cache=cache,
|
|
polls=polls,
|
|
progress_cb=progress_cb,
|
|
count_comp=count_comp,
|
|
)
|
|
elapsed = time.monotonic() - t0
|
|
|
|
save_hltb_cache(cache, polls, _HLTBExtras(count_comp=count_comp))
|
|
|
|
found = sum(1 for aid, _ in uncached if cache.get(aid, -1) > 0)
|
|
rate = len(uncached) / elapsed if elapsed > 0 else 0
|
|
logger.info(
|
|
"HLTB confidence fetch done: %d/%d found in %.1fs (%.0f games/s)",
|
|
found,
|
|
len(uncached),
|
|
elapsed,
|
|
rate,
|
|
)
|
|
else:
|
|
logger.info("All %d games found in HLTB cache.", len(games))
|
|
|
|
return cache
|
|
|
|
|
|
def fetch_hltb_detail_missing(
|
|
games: list[tuple[int, str]],
|
|
progress_cb: ProgressCb | None = None,
|
|
) -> int:
|
|
"""Fetch HLTB detail (rush + leisure) for games that are missing it.
|
|
|
|
Also backfills ``hltb_game_id`` for any game that already has rush/leisure
|
|
data but whose HLTB game ID was never stored (e.g. from an old cache).
|
|
Games with both rush data and a game_id are skipped entirely. For the
|
|
rest, temporarily removes them from the hours cache so ``fetch_hltb_times``
|
|
will visit their detail pages. Restores prior hours for any game the
|
|
re-fetch doesn't find.
|
|
|
|
Args:
|
|
games: list of (app_id, name) tuples to check.
|
|
progress_cb: optional progress callback.
|
|
|
|
Returns:
|
|
Number of games that now have rush-hour data after the fetch.
|
|
"""
|
|
rush = load_hltb_rush_cache()
|
|
game_id_cache = load_hltb_game_id_cache()
|
|
missing_rush = [
|
|
(app_id, name) for app_id, name in games if rush.get(app_id, -1) <= 0
|
|
]
|
|
# Also re-search games that have rush data but no HLTB game ID yet so the
|
|
# direct URL can be shown in stats output.
|
|
missing_id_only = [
|
|
(app_id, name)
|
|
for app_id, name in games
|
|
if rush.get(app_id, -1) > 0 and game_id_cache.get(app_id, 0) == 0
|
|
]
|
|
missing = missing_rush + missing_id_only
|
|
if not missing:
|
|
return 0
|
|
|
|
cache = load_hltb_cache()
|
|
polls = load_hltb_polls_cache()
|
|
extras = _HLTBExtras(
|
|
count_comp=load_hltb_count_comp_cache(),
|
|
rush=rush,
|
|
leisure_100h=load_hltb_leisure_100h_cache(),
|
|
hltb_game_id=game_id_cache,
|
|
)
|
|
|
|
# Remove from hours cache so fetch_hltb_times will visit the detail page.
|
|
prior_hours: dict[int, float] = {}
|
|
for app_id, _ in missing:
|
|
prior_hours[app_id] = cache.pop(app_id, -1.0)
|
|
|
|
n_rush = len(missing_rush)
|
|
n_id = len(missing_id_only)
|
|
if n_rush and n_id:
|
|
logger.info(
|
|
"Fetching HLTB detail for %d games missing rush/leisure data"
|
|
" + %d games missing game ID...",
|
|
n_rush,
|
|
n_id,
|
|
)
|
|
elif n_rush:
|
|
logger.info(
|
|
"Fetching HLTB detail for %d games missing rush/leisure data...", n_rush
|
|
)
|
|
else:
|
|
logger.info("Backfilling HLTB game ID for %d game(s)...", n_id)
|
|
t0 = time.monotonic()
|
|
fetch_hltb_times(
|
|
missing,
|
|
cache=cache,
|
|
polls=polls,
|
|
progress_cb=progress_cb,
|
|
extras=extras,
|
|
)
|
|
elapsed = time.monotonic() - t0
|
|
|
|
# Restore prior hours for games the detail fetch didn't re-find.
|
|
for app_id, old_hours in prior_hours.items():
|
|
if old_hours > 0 and cache.get(app_id, -1.0) <= 0:
|
|
cache[app_id] = old_hours
|
|
|
|
save_hltb_cache(cache, polls, extras)
|
|
|
|
fetched = sum(1 for app_id, _ in missing_rush if extras.rush.get(app_id, -1) > 0)
|
|
rate = len(missing) / elapsed if elapsed > 0 else 0
|
|
logger.info(
|
|
"HLTB detail fetch done: %d/%d got rush data in %.1fs (%.0f games/s)",
|
|
fetched,
|
|
len(missing_rush),
|
|
elapsed,
|
|
rate,
|
|
)
|
|
return fetched
|
|
|
|
|
|
def get_hltb_submit_url(game_name: str) -> str | None:
|
|
"""Look up a game on HLTB and return its submit page URL.
|
|
|
|
Args:
|
|
game_name: Name of the game to search for.
|
|
|
|
Returns:
|
|
URL like ``https://howlongtobeat.com/submit/game/12345``,
|
|
or ``None`` if the game wasn't found.
|
|
"""
|
|
results = fetch_hltb_times([(0, game_name)])
|
|
if results and results[0].hltb_game_id:
|
|
return f"{HLTB_BASE_URL}/submit/game/{results[0].hltb_game_id}"
|
|
return None
|