steam-backlog-enforcer/steam_backlog_enforcer/_scanning_confidence.py

254 lines
8.7 KiB
Python
Raw Permalink Normal View History

"""Confidence-checking and candidate-filtering helpers for scanning."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from steam_backlog_enforcer._hltb_types import (
_HLTBExtras,
load_hltb_cache,
load_hltb_count_comp_cache,
load_hltb_polls_cache,
save_hltb_cache,
)
from steam_backlog_enforcer.game_install import _echo
from steam_backlog_enforcer.hltb import fetch_hltb_confidence_cached
if TYPE_CHECKING:
from steam_backlog_enforcer.config import State
from steam_backlog_enforcer.steam_api import GameInfo
logger = logging.getLogger(__name__)
_MIN_COMP_100_POLLS = 3
_MIN_COUNT_COMP = 15
_MIN_CONFIDENCE_SUM = 18
def _apply_cached_confidence_to_candidates(candidates: list[GameInfo]) -> None:
"""Overlay cached confidence counters onto candidate game objects."""
polls_cache = load_hltb_polls_cache()
count_comp_cache = load_hltb_count_comp_cache()
for game in candidates:
if game.app_id in polls_cache:
game.comp_100_count = polls_cache[game.app_id]
if game.app_id in count_comp_cache:
game.count_comp = count_comp_cache[game.app_id]
def _confidence_fail_reasons(game: GameInfo) -> list[str]:
"""Return threshold-failure reasons for a game's HLTB confidence data."""
reasons: list[str] = []
if game.comp_100_count < _MIN_COMP_100_POLLS:
reasons.append(f"comp_100 polls {game.comp_100_count} < {_MIN_COMP_100_POLLS}")
if game.count_comp < _MIN_COUNT_COMP:
reasons.append(f"count_comp {game.count_comp} < {_MIN_COUNT_COMP}")
total = game.comp_100_count + game.count_comp
if total < _MIN_CONFIDENCE_SUM:
reasons.append(f"comp_100+count_comp {total} < {_MIN_CONFIDENCE_SUM}")
return reasons
def _refresh_candidate_confidence(game: GameInfo) -> None:
"""Refresh confidence metrics for one candidate when cache looks stale.
Refreshes when either metric is missing (0). A game with comp_100_count>0
but count_comp==0 means the detail-page all-playstyles count was not yet
populated (e.g. the cache predates that field).
"""
if game.comp_100_count > 0 and game.count_comp > 0:
return
_refresh_candidate_confidence_batch([game])
def _force_refresh_candidate_confidence(game: GameInfo) -> None:
"""Force-refresh one candidate's confidence metrics from HLTB."""
_refresh_candidate_confidence_batch([game], force=True)
def _refresh_candidate_confidence_batch(
candidates: list[GameInfo],
*,
force: bool = False,
) -> None:
"""Refresh missing confidence metrics for candidates in one HLTB batch.
This prevents O(N) one-game API loops when many snapshot entries predate
confidence fields and therefore have ``comp_100_count==0`` and
``count_comp==0``.
"""
missing = [
game
for game in candidates
if force or (game.comp_100_count == 0 and game.count_comp == 0)
]
if not missing:
return
refresh_slice = missing
if len(refresh_slice) == 1:
game = refresh_slice[0]
_echo(f" Refreshing HLTB confidence for {game.name} (AppID={game.app_id})...")
else:
_echo(f" Refreshing HLTB confidence for {len(refresh_slice)} candidate(s)...")
cache = load_hltb_cache()
polls = load_hltb_polls_cache()
count_comp = load_hltb_count_comp_cache()
app_ids = [game.app_id for game in refresh_slice]
names = [(game.app_id, game.name) for game in refresh_slice]
prior_hours = {aid: cache.get(aid, -1) for aid in app_ids}
for aid in app_ids:
cache.pop(aid, None)
polls.pop(aid, None)
count_comp.pop(aid, None)
save_hltb_cache(cache, polls, _HLTBExtras(count_comp=count_comp))
fetch_hltb_confidence_cached(names)
refreshed_hours = load_hltb_cache()
refreshed_polls = load_hltb_polls_cache()
refreshed_count_comp = load_hltb_count_comp_cache()
for aid, old_hours in prior_hours.items():
if old_hours > 0 and refreshed_hours.get(aid, -1) <= 0:
refreshed_hours[aid] = old_hours
save_hltb_cache(
refreshed_hours, refreshed_polls, _HLTBExtras(count_comp=refreshed_count_comp)
)
for game in refresh_slice:
game.comp_100_count = refreshed_polls.get(game.app_id, 0)
game.count_comp = refreshed_count_comp.get(game.app_id, 0)
def _filter_hltb_confident_candidates(
candidates: list[GameInfo],
) -> list[GameInfo]:
"""Keep only candidates that satisfy HLTB confidence thresholds."""
_refresh_candidate_confidence_batch(candidates)
kept: list[GameInfo] = []
for game in candidates:
reasons = _confidence_fail_reasons(game)
if reasons:
_echo(
f" Skipping {game.name} (AppID={game.app_id}): "
f"HLTB confidence too low ({'; '.join(reasons)})"
)
continue
kept.append(game)
return kept
def _candidate_passes_hltb_confidence(game: GameInfo) -> bool:
"""Return True if candidate passes confidence with cache-first behavior.
Only refreshes when confidence fields are missing (both zero), which keeps
normal runs cache-friendly and avoids repeated refetches for known
low-confidence entries.
"""
reasons = _confidence_fail_reasons(game)
if not reasons:
return True
# Re-check once when confidence fields are missing in cache.
_refresh_candidate_confidence(game)
reasons = _confidence_fail_reasons(game)
if reasons:
_echo(
f" Skipping {game.name} (AppID={game.app_id}): "
f"HLTB confidence too low ({'; '.join(reasons)})"
)
return False
return True
def _backfill_polls_for_finished(
state: State,
games: list[GameInfo],
) -> dict[int, int]:
"""Lazily fetch poll counts for already-finished games missing them.
Reads the polls cache, identifies finished games whose poll count is
still ``0`` (typically because the cache predates the polls schema),
and triggers a one-shot HLTB search to backfill them. Returns the
refreshed polls cache.
"""
polls_cache = load_hltb_polls_cache()
name_by_id = {g.app_id: g.name for g in games}
missing = [
(aid, name_by_id[aid])
for aid in state.finished_app_ids
if aid in name_by_id and polls_cache.get(aid, 0) == 0
]
if not missing:
return polls_cache
logger.info(
"Backfilling HLTB poll counts for %d already-finished games...",
len(missing),
)
# Force a fresh search by removing the hours entries we want to refetch.
# (fetch_hltb_times_cached skips entries already in the hours cache.)
cache = load_hltb_cache()
preserved_hours = {aid: cache[aid] for aid, _ in missing if aid in cache}
for aid, _name in missing:
cache.pop(aid, None)
save_hltb_cache(cache, polls_cache)
fetch_hltb_confidence_cached(missing)
# Restore any previously-known hours that the refetch may have replaced
# with a worse match (we trust prior leisure+dlc estimates).
refreshed_hours = load_hltb_cache()
refreshed_polls = load_hltb_polls_cache()
for aid, prior_hours in preserved_hours.items():
if prior_hours > 0 and refreshed_hours.get(aid, -1) <= 0:
refreshed_hours[aid] = prior_hours
save_hltb_cache(refreshed_hours, refreshed_polls)
return refreshed_polls
def _report_poll_confidence(
chosen: GameInfo,
games: list[GameInfo],
state: State,
) -> None:
"""Print HLTB poll-count confidence info for the just-assigned game.
Shows the chosen game's ``comp_100_count`` (number of polled
completionist times on HowLongToBeat) and the historical minimum
among the user's previously-finished games. Marks a new historical
low so the user can be skeptical of unreliable estimates.
"""
polls_cache = _backfill_polls_for_finished(state, games)
chosen_polls = polls_cache.get(chosen.app_id, chosen.comp_100_count)
chosen.comp_100_count = chosen_polls
finished_polls = [
(polls_cache[aid], aid)
for aid in state.finished_app_ids
if polls_cache.get(aid, 0) > 0
]
if not finished_polls:
_echo(f" HLTB confidence: {chosen_polls} polled completionist times")
return
min_polls, min_aid = min(finished_polls)
name_by_id = {g.app_id: g.name for g in games}
min_name = name_by_id.get(min_aid, f"AppID={min_aid}")
warning = ""
if 0 < chosen_polls < min_polls:
warning = " ⚠ NEW LOW — estimate may be unreliable"
elif chosen_polls == 0:
warning = " ⚠ no polls recorded — estimate may be unreliable"
_echo(f" HLTB confidence: {chosen_polls} polled completionist times{warning}")
_echo(f" Historical min among finished: {min_polls} ({min_name})")