mirror of
https://github.com/kuhyx/steam-backlog-enforcer.git
synced 2026-07-04 12:03:13 +02:00
- Extract count_comp from detail page in _apply_detail_to_extras so the all-playstyles completion count is populated even when the search API returns 0 (Mini Ghost: 0 → 69, now passes confidence thresholds) - Fix _refresh_candidate_confidence to trigger re-fetch when count_comp==0 even if comp_100_count>0 (was silently skipping stale partial entries) - Filter colon-stripped fallback candidates (e.g. "Vox Populi" from "Vox Populi: Poland 2023") to full-edition or exact matches only, preventing cross-franchise false positives - Demote "All N ProtonDB ratings found in cache" log to DEBUG to remove per-game noise from the scan output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
254 lines
8.7 KiB
Python
254 lines
8.7 KiB
Python
"""Confidence-checking and candidate-filtering helpers for scanning."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import TYPE_CHECKING
|
|
|
|
from steam_backlog_enforcer._hltb_types import (
|
|
_HLTBExtras,
|
|
load_hltb_cache,
|
|
load_hltb_count_comp_cache,
|
|
load_hltb_polls_cache,
|
|
save_hltb_cache,
|
|
)
|
|
from steam_backlog_enforcer.game_install import _echo
|
|
from steam_backlog_enforcer.hltb import fetch_hltb_confidence_cached
|
|
|
|
if TYPE_CHECKING:
|
|
from steam_backlog_enforcer.config import State
|
|
from steam_backlog_enforcer.steam_api import GameInfo
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_MIN_COMP_100_POLLS = 3
|
|
_MIN_COUNT_COMP = 15
|
|
_MIN_CONFIDENCE_SUM = 18
|
|
|
|
|
|
def _apply_cached_confidence_to_candidates(candidates: list[GameInfo]) -> None:
|
|
"""Overlay cached confidence counters onto candidate game objects."""
|
|
polls_cache = load_hltb_polls_cache()
|
|
count_comp_cache = load_hltb_count_comp_cache()
|
|
for game in candidates:
|
|
if game.app_id in polls_cache:
|
|
game.comp_100_count = polls_cache[game.app_id]
|
|
if game.app_id in count_comp_cache:
|
|
game.count_comp = count_comp_cache[game.app_id]
|
|
|
|
|
|
def _confidence_fail_reasons(game: GameInfo) -> list[str]:
|
|
"""Return threshold-failure reasons for a game's HLTB confidence data."""
|
|
reasons: list[str] = []
|
|
if game.comp_100_count < _MIN_COMP_100_POLLS:
|
|
reasons.append(f"comp_100 polls {game.comp_100_count} < {_MIN_COMP_100_POLLS}")
|
|
if game.count_comp < _MIN_COUNT_COMP:
|
|
reasons.append(f"count_comp {game.count_comp} < {_MIN_COUNT_COMP}")
|
|
|
|
total = game.comp_100_count + game.count_comp
|
|
if total < _MIN_CONFIDENCE_SUM:
|
|
reasons.append(f"comp_100+count_comp {total} < {_MIN_CONFIDENCE_SUM}")
|
|
|
|
return reasons
|
|
|
|
|
|
def _refresh_candidate_confidence(game: GameInfo) -> None:
|
|
"""Refresh confidence metrics for one candidate when cache looks stale.
|
|
|
|
Refreshes when either metric is missing (0). A game with comp_100_count>0
|
|
but count_comp==0 means the detail-page all-playstyles count was not yet
|
|
populated (e.g. the cache predates that field).
|
|
"""
|
|
if game.comp_100_count > 0 and game.count_comp > 0:
|
|
return
|
|
|
|
_refresh_candidate_confidence_batch([game])
|
|
|
|
|
|
def _force_refresh_candidate_confidence(game: GameInfo) -> None:
|
|
"""Force-refresh one candidate's confidence metrics from HLTB."""
|
|
_refresh_candidate_confidence_batch([game], force=True)
|
|
|
|
|
|
def _refresh_candidate_confidence_batch(
|
|
candidates: list[GameInfo],
|
|
*,
|
|
force: bool = False,
|
|
) -> None:
|
|
"""Refresh missing confidence metrics for candidates in one HLTB batch.
|
|
|
|
This prevents O(N) one-game API loops when many snapshot entries predate
|
|
confidence fields and therefore have ``comp_100_count==0`` and
|
|
``count_comp==0``.
|
|
"""
|
|
missing = [
|
|
game
|
|
for game in candidates
|
|
if force or (game.comp_100_count == 0 and game.count_comp == 0)
|
|
]
|
|
if not missing:
|
|
return
|
|
|
|
refresh_slice = missing
|
|
if len(refresh_slice) == 1:
|
|
game = refresh_slice[0]
|
|
_echo(f" Refreshing HLTB confidence for {game.name} (AppID={game.app_id})...")
|
|
else:
|
|
_echo(f" Refreshing HLTB confidence for {len(refresh_slice)} candidate(s)...")
|
|
|
|
cache = load_hltb_cache()
|
|
polls = load_hltb_polls_cache()
|
|
count_comp = load_hltb_count_comp_cache()
|
|
app_ids = [game.app_id for game in refresh_slice]
|
|
names = [(game.app_id, game.name) for game in refresh_slice]
|
|
prior_hours = {aid: cache.get(aid, -1) for aid in app_ids}
|
|
|
|
for aid in app_ids:
|
|
cache.pop(aid, None)
|
|
polls.pop(aid, None)
|
|
count_comp.pop(aid, None)
|
|
save_hltb_cache(cache, polls, _HLTBExtras(count_comp=count_comp))
|
|
|
|
fetch_hltb_confidence_cached(names)
|
|
|
|
refreshed_hours = load_hltb_cache()
|
|
refreshed_polls = load_hltb_polls_cache()
|
|
refreshed_count_comp = load_hltb_count_comp_cache()
|
|
for aid, old_hours in prior_hours.items():
|
|
if old_hours > 0 and refreshed_hours.get(aid, -1) <= 0:
|
|
refreshed_hours[aid] = old_hours
|
|
save_hltb_cache(
|
|
refreshed_hours, refreshed_polls, _HLTBExtras(count_comp=refreshed_count_comp)
|
|
)
|
|
|
|
for game in refresh_slice:
|
|
game.comp_100_count = refreshed_polls.get(game.app_id, 0)
|
|
game.count_comp = refreshed_count_comp.get(game.app_id, 0)
|
|
|
|
|
|
def _filter_hltb_confident_candidates(
|
|
candidates: list[GameInfo],
|
|
) -> list[GameInfo]:
|
|
"""Keep only candidates that satisfy HLTB confidence thresholds."""
|
|
_refresh_candidate_confidence_batch(candidates)
|
|
|
|
kept: list[GameInfo] = []
|
|
for game in candidates:
|
|
reasons = _confidence_fail_reasons(game)
|
|
if reasons:
|
|
_echo(
|
|
f" Skipping {game.name} (AppID={game.app_id}): "
|
|
f"HLTB confidence too low ({'; '.join(reasons)})"
|
|
)
|
|
continue
|
|
kept.append(game)
|
|
return kept
|
|
|
|
|
|
def _candidate_passes_hltb_confidence(game: GameInfo) -> bool:
|
|
"""Return True if candidate passes confidence with cache-first behavior.
|
|
|
|
Only refreshes when confidence fields are missing (both zero), which keeps
|
|
normal runs cache-friendly and avoids repeated refetches for known
|
|
low-confidence entries.
|
|
"""
|
|
reasons = _confidence_fail_reasons(game)
|
|
if not reasons:
|
|
return True
|
|
|
|
# Re-check once when confidence fields are missing in cache.
|
|
_refresh_candidate_confidence(game)
|
|
reasons = _confidence_fail_reasons(game)
|
|
if reasons:
|
|
_echo(
|
|
f" Skipping {game.name} (AppID={game.app_id}): "
|
|
f"HLTB confidence too low ({'; '.join(reasons)})"
|
|
)
|
|
return False
|
|
return True
|
|
|
|
|
|
def _backfill_polls_for_finished(
|
|
state: State,
|
|
games: list[GameInfo],
|
|
) -> dict[int, int]:
|
|
"""Lazily fetch poll counts for already-finished games missing them.
|
|
|
|
Reads the polls cache, identifies finished games whose poll count is
|
|
still ``0`` (typically because the cache predates the polls schema),
|
|
and triggers a one-shot HLTB search to backfill them. Returns the
|
|
refreshed polls cache.
|
|
"""
|
|
polls_cache = load_hltb_polls_cache()
|
|
name_by_id = {g.app_id: g.name for g in games}
|
|
missing = [
|
|
(aid, name_by_id[aid])
|
|
for aid in state.finished_app_ids
|
|
if aid in name_by_id and polls_cache.get(aid, 0) == 0
|
|
]
|
|
if not missing:
|
|
return polls_cache
|
|
|
|
logger.info(
|
|
"Backfilling HLTB poll counts for %d already-finished games...",
|
|
len(missing),
|
|
)
|
|
# Force a fresh search by removing the hours entries we want to refetch.
|
|
# (fetch_hltb_times_cached skips entries already in the hours cache.)
|
|
cache = load_hltb_cache()
|
|
preserved_hours = {aid: cache[aid] for aid, _ in missing if aid in cache}
|
|
for aid, _name in missing:
|
|
cache.pop(aid, None)
|
|
save_hltb_cache(cache, polls_cache)
|
|
|
|
fetch_hltb_confidence_cached(missing)
|
|
|
|
# Restore any previously-known hours that the refetch may have replaced
|
|
# with a worse match (we trust prior leisure+dlc estimates).
|
|
refreshed_hours = load_hltb_cache()
|
|
refreshed_polls = load_hltb_polls_cache()
|
|
for aid, prior_hours in preserved_hours.items():
|
|
if prior_hours > 0 and refreshed_hours.get(aid, -1) <= 0:
|
|
refreshed_hours[aid] = prior_hours
|
|
save_hltb_cache(refreshed_hours, refreshed_polls)
|
|
return refreshed_polls
|
|
|
|
|
|
def _report_poll_confidence(
|
|
chosen: GameInfo,
|
|
games: list[GameInfo],
|
|
state: State,
|
|
) -> None:
|
|
"""Print HLTB poll-count confidence info for the just-assigned game.
|
|
|
|
Shows the chosen game's ``comp_100_count`` (number of polled
|
|
completionist times on HowLongToBeat) and the historical minimum
|
|
among the user's previously-finished games. Marks a new historical
|
|
low so the user can be skeptical of unreliable estimates.
|
|
"""
|
|
polls_cache = _backfill_polls_for_finished(state, games)
|
|
chosen_polls = polls_cache.get(chosen.app_id, chosen.comp_100_count)
|
|
chosen.comp_100_count = chosen_polls
|
|
|
|
finished_polls = [
|
|
(polls_cache[aid], aid)
|
|
for aid in state.finished_app_ids
|
|
if polls_cache.get(aid, 0) > 0
|
|
]
|
|
if not finished_polls:
|
|
_echo(f" HLTB confidence: {chosen_polls} polled completionist times")
|
|
return
|
|
|
|
min_polls, min_aid = min(finished_polls)
|
|
name_by_id = {g.app_id: g.name for g in games}
|
|
min_name = name_by_id.get(min_aid, f"AppID={min_aid}")
|
|
|
|
warning = ""
|
|
if 0 < chosen_polls < min_polls:
|
|
warning = " ⚠ NEW LOW — estimate may be unreliable"
|
|
elif chosen_polls == 0:
|
|
warning = " ⚠ no polls recorded — estimate may be unreliable"
|
|
|
|
_echo(f" HLTB confidence: {chosen_polls} polled completionist times{warning}")
|
|
_echo(f" Historical min among finished: {min_polls} ({min_name})")
|