fix(steam-backlog-enforcer): filter HLTB subset entries and refresh stale snapshot hours

- Add _SUBSET_SUFFIXES filter in _pick_best_hltb_entry to avoid
  matching prologue/demo/trial/lite/prelude entries (e.g. prevents
  'A Space for the Unbound - Prologue' from matching over full game)
- Fix stale completionist_hours in snapshot used during reassignment:
  refresh uncached shorter candidates from HLTB before comparing in
  _try_reassign_shorter_game
- Fix same stale-hours issue in _finalize_completion: load HLTB cache,
  refresh uncached shortlist, and apply cached hours before pick_next_game
- Add regression tests for all three fix paths (100% branch coverage)
This commit is contained in:
Krzysztof kuhy Rudnicki 2026-03-29 21:34:29 +02:00
parent 3299e273d9
commit 541897413e
5 changed files with 230 additions and 5 deletions

View File

@ -35,6 +35,18 @@ MAX_CONCURRENT = 60 # parallel requests to HLTB
_SAVE_INTERVAL = 50 # flush cache to disk every N results
MIN_SIMILARITY = 0.5
# Suffixes that indicate a subset release (prologue, demo, etc.).
# Used to avoid preferring "Game - Prologue" over "Game" when both exist.
_SUBSET_SUFFIXES = frozenset(
{
"prologue",
"demo",
"trial",
"lite",
"prelude",
}
)
# Type for progress callbacks: (done, total, found, game_name)
ProgressCb = Callable[[int, int, int, str], None]
@ -217,7 +229,32 @@ def _pick_best_hltb_entry(
for entry, sim in usable:
entry_name = (entry.get("game_name") or "").lower()
if entry_name.startswith((lower + ":", lower + " -")):
return entry, sim
suffix = entry_name[len(lower) :].lstrip(" :-")
if not any(suffix.startswith(kw) for kw in _SUBSET_SUFFIXES):
# Only prefer this extended entry when it has strictly more
# comp_100 than any exact-name match. This prevents
# "Killing Floor: Toy Master" (1.2 h) from beating
# "Killing Floor" (296 h) while still letting
# "FAITH: The Unholy Trinity" (7 h) beat "FAITH" (0.5 h demo).
extended_hours = entry.get("comp_100", 0)
best_exact = next(
(
(e, s)
for e, s in sorted(
usable,
key=lambda x: x[0].get("comp_100", 0),
reverse=True,
)
if (e.get("game_name") or "").lower() == lower
),
None,
)
if (
best_exact is not None
and best_exact[0].get("comp_100", 0) >= extended_hours
):
return best_exact
return entry, sim
# Fall back to highest similarity.
return max(usable, key=lambda x: x[1])
@ -285,6 +322,14 @@ async def _search_one(
if best is not None:
entry, sim = best
hours = round(entry["comp_100"] / 3600, 2)
logger.debug(
"HLTB match for '%s': '%s' (id=%s, comp_100=%s, sim=%.3f)",
name,
entry.get("game_name"),
entry.get("game_id"),
entry.get("comp_100"),
sim,
)
result = HLTBResult(
app_id=app_id,
game_name=name,

View File

@ -58,6 +58,7 @@ logger = logging.getLogger(__name__)
_LIST_DISPLAY_LIMIT = 50
_MIN_CLI_ARGS = 2
_REASSIGN_REFRESH_LIMIT = 50
# ──────────────────────────────────────────────────────────────
@ -283,6 +284,47 @@ def cmd_unhide(config: Config, _state: State) -> None:
_echo("Done!")
def _apply_cached_hours_to_games(
games: list[GameInfo],
hltb_cache: dict[int, float],
) -> None:
"""Overlay cached HLTB hours onto games (including cached misses)."""
for game in games:
if game.app_id in hltb_cache:
game.completionist_hours = hltb_cache[game.app_id]
def _refresh_uncached_shortlist_hours(
games: list[GameInfo],
hltb_cache: dict[int, float],
skip: set[int],
*,
upper_bound_hours: float | None = None,
) -> None:
"""Refresh likely-short uncached games to avoid stale snapshot decisions."""
shorter_uncached = [
(g.app_id, g.name)
for g in sorted(
(
game
for game in games
if not game.is_complete
and game.app_id not in skip
and game.completionist_hours > 0
and game.app_id not in hltb_cache
and (
upper_bound_hours is None
or game.completionist_hours < upper_bound_hours
)
),
key=lambda game: game.completionist_hours,
)[:_REASSIGN_REFRESH_LIMIT]
]
if shorter_uncached:
refreshed = fetch_hltb_times_cached(shorter_uncached)
hltb_cache.update(refreshed)
def _try_reassign_shorter_game(
hltb_cache: dict[int, float],
app_id: int,
@ -295,11 +337,14 @@ def _try_reassign_shorter_game(
if not snapshot_data:
return False
all_games = [GameInfo.from_snapshot(d) for d in snapshot_data]
for g in all_games:
cached_hours = hltb_cache.get(g.app_id, -1.0)
if cached_hours > 0:
g.completionist_hours = cached_hours
skip = set(config.skip_app_ids) | set(state.finished_app_ids)
_refresh_uncached_shortlist_hours(
all_games,
hltb_cache,
skip,
upper_bound_hours=hours,
)
_apply_cached_hours_to_games(all_games, hltb_cache)
candidates = [
g
for g in all_games
@ -342,6 +387,10 @@ def _finalize_completion(
return
games = [GameInfo.from_snapshot(d) for d in snapshot_data]
hltb_cache = load_hltb_cache()
skip = set(config.skip_app_ids) | set(state.finished_app_ids)
_refresh_uncached_shortlist_hours(games, hltb_cache, skip)
_apply_cached_hours_to_games(games, hltb_cache)
pick_next_game(games, state, config)
if state.current_app_id is None:

View File

@ -281,6 +281,60 @@ class TestPickBestHltbEntry:
assert result is not None
assert result[0]["game_type"] == "game"
def test_skips_prologue_subset(self) -> None:
"""A '- Prologue' entry should not beat the full game."""
full: dict[str, Any] = {
"game_name": "A Space For The Unbound",
"comp_100": 45000,
}
prologue: dict[str, Any] = {
"game_name": "A Space for the Unbound - Prologue",
"comp_100": 1680,
}
result = _pick_best_hltb_entry(
"A Space for the Unbound",
[(prologue, 0.9), (full, 0.95)],
)
assert result is not None
assert result[0]["game_name"] == "A Space For The Unbound"
def test_skips_demo_subset(self) -> None:
"""A ': Demo' entry should not beat the full game."""
full: dict[str, Any] = {"game_name": "MyGame", "comp_100": 36000}
demo: dict[str, Any] = {"game_name": "MyGame: Demo", "comp_100": 1800}
result = _pick_best_hltb_entry("MyGame", [(demo, 0.9), (full, 1.0)])
assert result is not None
assert result[0]["game_name"] == "MyGame"
def test_still_prefers_full_edition_over_demo(self) -> None:
"""A ': Full Edition' entry should still be preferred (not a subset)."""
short: dict[str, Any] = {"game_name": "FAITH", "comp_100": 1800}
full: dict[str, Any] = {
"game_name": "FAITH: The Unholy Trinity",
"comp_100": 7200,
}
result = _pick_best_hltb_entry("FAITH", [(short, 1.0), (full, 0.8)])
assert result is not None
assert result[0]["game_name"] == "FAITH: The Unholy Trinity"
def test_exact_match_beats_unrelated_subtitle(self) -> None:
"""Exact name with more hours wins over an unrelated subtitle entry.
'Killing Floor: Toy Master' (1.2 h) must NOT beat 'Killing Floor'
(296 h) just because it starts with 'Killing Floor:'.
"""
base: dict[str, Any] = {
"game_name": "Killing Floor",
"comp_100": 1065600, # 296 h
}
spinoff: dict[str, Any] = {
"game_name": "Killing Floor: Toy Master",
"comp_100": 4320, # 1.2 h
}
result = _pick_best_hltb_entry("Killing Floor", [(spinoff, 0.7), (base, 1.0)])
assert result is not None
assert result[0]["game_name"] == "Killing Floor"
class _FakeResponse:
"""Async context manager mimicking aiohttp response."""

View File

@ -481,3 +481,43 @@ class TestTryReassignShorterGame:
Config(),
)
assert not result
def test_refreshes_stale_shorter_snapshot_entry(self) -> None:
"""Uncached shorter snapshot candidates are refreshed before reassigning."""
snap = [
_snap(1, "Current", 10, 5, 20.1),
_snap(2, "Lacuna", 10, 0, 0.9),
]
state = State(current_app_id=1, current_game_name="Current")
refreshed_short = GameInfo(
app_id=2,
name="Lacuna",
total_achievements=10,
unlocked_achievements=0,
playtime_minutes=60,
completionist_hours=18.8,
)
with (
patch(f"{PKG}.load_snapshot", return_value=snap),
patch(
f"{PKG}.fetch_hltb_times_cached",
return_value={2: 18.8},
) as mock_fetch_hltb,
patch(
f"{PKG}._pick_playable_candidate",
return_value=refreshed_short,
) as mock_pick_playable,
patch(f"{PKG}.pick_next_game"),
patch(f"{PKG}._echo"),
):
result = _try_reassign_shorter_game(
{1: 20.1},
1,
20.1,
state,
Config(),
)
assert result
mock_fetch_hltb.assert_called_once_with([(2, "Lacuna")])
mock_pick_playable.assert_called_once()

View File

@ -147,6 +147,43 @@ class TestFinalizeCompletion:
mock_pick.side_effect = set_2
_finalize_completion(config, state, "G", 1)
def test_refreshes_snapshot_hours_before_pick(self) -> None:
"""Ensure stale snapshot hours are replaced before picking next game."""
config = Config()
state = State(current_app_id=1, current_game_name="G")
snap = [
_snap(2, "A Space for the Unbound", 10, 0, 0.56),
_snap(3, "Lacuna", 10, 0, 1.2),
]
seen: dict[int, float] = {}
def capture_pick(
games: list[GameInfo],
s: State,
_c: object,
) -> None:
for game in games:
seen[game.app_id] = game.completionist_hours
# Force early return path after pick_next_game.
s.current_app_id = None
with (
patch(f"{PKG}._echo"),
patch(f"{PKG}.load_snapshot", return_value=snap),
patch(f"{PKG}.load_hltb_cache", return_value={2: 20.05}),
patch(
f"{PKG}.fetch_hltb_times_cached",
return_value={3: 18.81},
) as mock_fetch_hltb,
patch(f"{PKG}.pick_next_game", side_effect=capture_pick),
patch.object(State, "save"),
):
_finalize_completion(config, state, "G", 1)
assert seen[2] == 20.05
assert seen[3] == 18.81
mock_fetch_hltb.assert_called_once_with([(3, "Lacuna")])
class TestEnforceOnDone:
"""Tests for _enforce_on_done."""