mirror of
https://github.com/kuhyx/steam-backlog-enforcer.git
synced 2026-07-04 11:43:12 +02:00
fix: HLTB count_comp=0 bug, false matches, and ProtonDB log noise
- Extract count_comp from detail page in _apply_detail_to_extras so the all-playstyles completion count is populated even when the search API returns 0 (Mini Ghost: 0 → 69, now passes confidence thresholds) - Fix _refresh_candidate_confidence to trigger re-fetch when count_comp==0 even if comp_100_count>0 (was silently skipping stale partial entries) - Filter colon-stripped fallback candidates (e.g. "Vox Populi" from "Vox Populi: Poland 2023") to full-edition or exact matches only, preventing cross-franchise false positives - Demote "All N ProtonDB ratings found in cache" log to DEBUG to remove per-game noise from the scan output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
551b8a4f95
commit
b8bd8459e4
@ -232,6 +232,29 @@ def _process_game_detail(
|
||||
return leisure, rush_h, l100
|
||||
|
||||
|
||||
def _apply_detail_to_extras(
|
||||
app_id: int,
|
||||
game_data: dict[str, Any],
|
||||
dlc_rels: list[tuple[int, float]],
|
||||
dlc_hours_by_id: dict[int, float],
|
||||
extras: _HLTBExtras,
|
||||
) -> float:
|
||||
"""Update extras in-place from detail data; return leisure hours (or -1)."""
|
||||
leisure, rush_h, l100 = _process_game_detail(game_data, dlc_rels, dlc_hours_by_id)
|
||||
if rush_h > 0:
|
||||
extras.rush[app_id] = rush_h
|
||||
if l100 > 0:
|
||||
extras.leisure_100h[app_id] = l100
|
||||
# The search API sometimes returns count_comp=0 even when the detail page
|
||||
# has all-playstyles completion counts. Use the detail value when present.
|
||||
games_list = game_data.get("game")
|
||||
if isinstance(games_list, list) and games_list:
|
||||
count_comp_detail = _as_positive_int(games_list[0].get("count_comp", 0))
|
||||
if count_comp_detail > 0:
|
||||
extras.count_comp[app_id] = count_comp_detail
|
||||
return leisure
|
||||
|
||||
|
||||
async def _fetch_leisure_times(
|
||||
search_results: list[HLTBResult],
|
||||
cache: dict[int, float],
|
||||
@ -279,17 +302,13 @@ async def _fetch_leisure_times(
|
||||
done += 1
|
||||
if game_data is not None:
|
||||
dlc_rels = dlc_relationships_by_app.get(r.app_id, [])
|
||||
leisure, rush_h, l100 = _process_game_detail(
|
||||
game_data, dlc_rels, dlc_hours_by_id
|
||||
leisure = _apply_detail_to_extras(
|
||||
r.app_id, game_data, dlc_rels, dlc_hours_by_id, extras
|
||||
)
|
||||
if leisure > 0:
|
||||
r.completionist_hours = leisure
|
||||
cache[r.app_id] = leisure
|
||||
found += 1
|
||||
if rush_h > 0:
|
||||
extras.rush[r.app_id] = rush_h
|
||||
if l100 > 0:
|
||||
extras.leisure_100h[r.app_id] = l100
|
||||
|
||||
if progress_cb is not None:
|
||||
progress_cb(done, total, found, r.game_name)
|
||||
|
||||
@ -32,6 +32,13 @@ from steam_backlog_enforcer._hltb_types import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# When extended entry has ≥ this many times more hours than the exact match,
|
||||
# prefer it even if its confidence count is lower.
|
||||
_EXTENDED_DOMINANCE_RATIO = 4.0
|
||||
# Minimum combined confidence for the dominance path (avoids picking entries
|
||||
# that have almost no data at all).
|
||||
_EXTENDED_MIN_CONFIDENCE = 3
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────
|
||||
# HLTB API setup (done once, not per-request like the library)
|
||||
@ -326,12 +333,14 @@ def _find_best_extended(
|
||||
) -> tuple[dict[str, Any], float] | None:
|
||||
"""Find best extended entry ("Name: Subtitle" / "Name - Subtitle").
|
||||
|
||||
Skips subset entries (prologue, demo, etc.).
|
||||
Skips subset entries (prologue, demo, etc.). Compilations ("compil")
|
||||
are included because HLTB classifies multi-chapter collections that
|
||||
share the base title as compilations (e.g. "FAITH: The Unholy Trinity").
|
||||
"""
|
||||
best: tuple[dict[str, Any], float] | None = None
|
||||
for entry, sim in usable:
|
||||
game_type = str(entry.get("game_type", "")).lower()
|
||||
if game_type not in ("", "game"):
|
||||
if game_type not in ("", "game", "compil"):
|
||||
continue
|
||||
entry_name = (entry.get("game_name") or "").lower()
|
||||
if entry_name.startswith((lower + ":", lower + " -")):
|
||||
@ -358,13 +367,20 @@ def _resolve_exact_vs_extended(
|
||||
extended_confidence = int(best_extended[0].get("comp_100_count", 0) or 0) + int(
|
||||
best_extended[0].get("count_comp", 0) or 0
|
||||
)
|
||||
# Prefer the extended entry only when it has strictly more hours
|
||||
# than the exact match AND at least as much confidence.
|
||||
# This lets "FAITH: The Unholy Trinity" (full game) beat
|
||||
# a low-confidence exact demo while preventing low-confidence
|
||||
# mods like "Celeste - Strawberry Jam" from beating
|
||||
# the exact base game.
|
||||
if extended_hours > exact_hours and extended_confidence >= exact_confidence:
|
||||
# Prefer the extended entry when it has more hours AND either:
|
||||
# (a) at least as much confidence (normal case), OR
|
||||
# (b) dominant hours ratio (>=4x) with minimal data — handles cases
|
||||
# like "FAITH: The Unholy Trinity" (17h, newer) vs "FAITH" 2017
|
||||
# (1.5h, older/more data) where the older exact match has
|
||||
# accumulated more confidence simply by being on HLTB longer.
|
||||
dominates = (
|
||||
exact_hours > 0
|
||||
and extended_hours >= exact_hours * _EXTENDED_DOMINANCE_RATIO
|
||||
and extended_confidence >= _EXTENDED_MIN_CONFIDENCE
|
||||
)
|
||||
if extended_hours > exact_hours and (
|
||||
extended_confidence >= exact_confidence or dominates
|
||||
):
|
||||
return best_extended
|
||||
return best_exact
|
||||
if best_exact is not None:
|
||||
@ -419,6 +435,26 @@ async def _search_one(
|
||||
continue
|
||||
data = await resp.json()
|
||||
candidates = _collect_candidates(query_name, data)
|
||||
# When we stripped ": subtitle" from the original name to
|
||||
# get query_name, only keep full-edition entries (those
|
||||
# whose HLTB name starts with query_name + ":"/"-") or
|
||||
# exact name/alias matches. This prevents "Vox Populi"
|
||||
# (stripped from "Vox Populi: Poland 2023") from falsely
|
||||
# matching "Vox Populi Vox Dei 2".
|
||||
if ":" in name and ":" not in query_name:
|
||||
lower_q = query_name.lower()
|
||||
candidates = [
|
||||
(e, s)
|
||||
for e, s in candidates
|
||||
if (e.get("game_name") or "").lower() == lower_q
|
||||
or (e.get("game_alias") or "").lower() == lower_q
|
||||
or (e.get("game_name") or "")
|
||||
.lower()
|
||||
.startswith(lower_q + ":")
|
||||
or (e.get("game_name") or "")
|
||||
.lower()
|
||||
.startswith(lower_q + " -")
|
||||
]
|
||||
best = _pick_best_hltb_entry(query_name, candidates)
|
||||
if best is None:
|
||||
continue
|
||||
|
||||
@ -55,10 +55,11 @@ def _confidence_fail_reasons(game: GameInfo) -> list[str]:
|
||||
def _refresh_candidate_confidence(game: GameInfo) -> None:
|
||||
"""Refresh confidence metrics for one candidate when cache looks stale.
|
||||
|
||||
Only refreshes when both metrics are missing (0), which typically means
|
||||
the game was cached before confidence fields were added.
|
||||
Refreshes when either metric is missing (0). A game with comp_100_count>0
|
||||
but count_comp==0 means the detail-page all-playstyles count was not yet
|
||||
populated (e.g. the cache predates that field).
|
||||
"""
|
||||
if game.comp_100_count > 0 or game.count_comp > 0:
|
||||
if game.comp_100_count > 0 and game.count_comp > 0:
|
||||
return
|
||||
|
||||
_refresh_candidate_confidence_batch([game])
|
||||
|
||||
@ -216,6 +216,6 @@ def fetch_protondb_ratings(
|
||||
_save_cache(cache)
|
||||
logger.info("ProtonDB: fetched %d, total cached %d", len(fetched), len(cache))
|
||||
else:
|
||||
logger.info("All %d ProtonDB ratings found in cache.", len(results))
|
||||
logger.debug("All %d ProtonDB ratings found in cache.", len(results))
|
||||
|
||||
return results
|
||||
|
||||
@ -259,12 +259,71 @@ class TestSearchOne:
|
||||
# Set done to one less than _SAVE_INTERVAL so it triggers save
|
||||
|
||||
ctx.counter["done"] = _SAVE_INTERVAL - 1
|
||||
with patch(
|
||||
"steam_backlog_enforcer._hltb_search.save_hltb_cache"
|
||||
) as mock_save:
|
||||
with patch("steam_backlog_enforcer._hltb_search.save_hltb_cache") as mock_save:
|
||||
asyncio.run(_search_one(asyncio.Semaphore(1), ctx, 440, "TF2"))
|
||||
mock_save.assert_called_once()
|
||||
|
||||
def test_colon_strip_fallback_rejects_cross_franchise_match(self) -> None:
|
||||
"""Colon-stripped fallback must not match a different franchise loosely.
|
||||
|
||||
"Vox Populi: Poland 2023" stripped to "Vox Populi" should NOT match
|
||||
"Vox Populi Vox Dei 2" (different game, low-similarity entry).
|
||||
"""
|
||||
empty_resp = _FakeResponse(200, {"data": []})
|
||||
loose_resp = _FakeResponse(
|
||||
200,
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"game_name": "Vox Populi Vox Dei 2",
|
||||
"game_alias": "",
|
||||
"game_type": "game",
|
||||
"comp_100": 14400,
|
||||
"comp_100_count": 9,
|
||||
"count_comp": 57,
|
||||
"game_id": 99999,
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
session = MagicMock()
|
||||
session.post.side_effect = [empty_resp, loose_resp]
|
||||
ctx = _make_ctx(session)
|
||||
result = asyncio.run(
|
||||
_search_one(asyncio.Semaphore(1), ctx, 2590810, "Vox Populi: Poland 2023")
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_colon_strip_fallback_accepts_full_edition(self) -> None:
|
||||
"""Colon-stripped fallback must still match when the HLTB entry is a
|
||||
full edition of the stripped name (name starts with stripped + ':').
|
||||
"""
|
||||
empty_resp = _FakeResponse(200, {"data": []})
|
||||
full_edition_resp = _FakeResponse(
|
||||
200,
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"game_name": "Batman: Arkham Asylum",
|
||||
"game_alias": "",
|
||||
"game_type": "game",
|
||||
"comp_100": 144000,
|
||||
"comp_100_count": 300,
|
||||
"count_comp": 5000,
|
||||
"game_id": 11111,
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
session = MagicMock()
|
||||
session.post.side_effect = [empty_resp, full_edition_resp]
|
||||
ctx = _make_ctx(session)
|
||||
result = asyncio.run(
|
||||
_search_one(asyncio.Semaphore(1), ctx, 35140, "Batman: Arkham Asylum")
|
||||
)
|
||||
assert result is not None
|
||||
assert result.game_name == "Batman: Arkham Asylum"
|
||||
|
||||
|
||||
class TestFetchBatchHltb:
|
||||
"""Tests for _fetch_batch (the hltb version)."""
|
||||
|
||||
Loading…
Reference in New Issue
Block a user