mirror of
https://github.com/kuhyx/steam-backlog-enforcer.git
synced 2026-07-04 13:43:45 +02:00
fix: HLTB count_comp=0 bug, false matches, and ProtonDB log noise
- Extract count_comp from detail page in _apply_detail_to_extras so the all-playstyles completion count is populated even when the search API returns 0 (Mini Ghost: 0 → 69, now passes confidence thresholds) - Fix _refresh_candidate_confidence to trigger re-fetch when count_comp==0 even if comp_100_count>0 (was silently skipping stale partial entries) - Filter colon-stripped fallback candidates (e.g. "Vox Populi" from "Vox Populi: Poland 2023") to full-edition or exact matches only, preventing cross-franchise false positives - Demote "All N ProtonDB ratings found in cache" log to DEBUG to remove per-game noise from the scan output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
551b8a4f95
commit
b8bd8459e4
@ -232,6 +232,29 @@ def _process_game_detail(
|
|||||||
return leisure, rush_h, l100
|
return leisure, rush_h, l100
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_detail_to_extras(
|
||||||
|
app_id: int,
|
||||||
|
game_data: dict[str, Any],
|
||||||
|
dlc_rels: list[tuple[int, float]],
|
||||||
|
dlc_hours_by_id: dict[int, float],
|
||||||
|
extras: _HLTBExtras,
|
||||||
|
) -> float:
|
||||||
|
"""Update extras in-place from detail data; return leisure hours (or -1)."""
|
||||||
|
leisure, rush_h, l100 = _process_game_detail(game_data, dlc_rels, dlc_hours_by_id)
|
||||||
|
if rush_h > 0:
|
||||||
|
extras.rush[app_id] = rush_h
|
||||||
|
if l100 > 0:
|
||||||
|
extras.leisure_100h[app_id] = l100
|
||||||
|
# The search API sometimes returns count_comp=0 even when the detail page
|
||||||
|
# has all-playstyles completion counts. Use the detail value when present.
|
||||||
|
games_list = game_data.get("game")
|
||||||
|
if isinstance(games_list, list) and games_list:
|
||||||
|
count_comp_detail = _as_positive_int(games_list[0].get("count_comp", 0))
|
||||||
|
if count_comp_detail > 0:
|
||||||
|
extras.count_comp[app_id] = count_comp_detail
|
||||||
|
return leisure
|
||||||
|
|
||||||
|
|
||||||
async def _fetch_leisure_times(
|
async def _fetch_leisure_times(
|
||||||
search_results: list[HLTBResult],
|
search_results: list[HLTBResult],
|
||||||
cache: dict[int, float],
|
cache: dict[int, float],
|
||||||
@ -279,17 +302,13 @@ async def _fetch_leisure_times(
|
|||||||
done += 1
|
done += 1
|
||||||
if game_data is not None:
|
if game_data is not None:
|
||||||
dlc_rels = dlc_relationships_by_app.get(r.app_id, [])
|
dlc_rels = dlc_relationships_by_app.get(r.app_id, [])
|
||||||
leisure, rush_h, l100 = _process_game_detail(
|
leisure = _apply_detail_to_extras(
|
||||||
game_data, dlc_rels, dlc_hours_by_id
|
r.app_id, game_data, dlc_rels, dlc_hours_by_id, extras
|
||||||
)
|
)
|
||||||
if leisure > 0:
|
if leisure > 0:
|
||||||
r.completionist_hours = leisure
|
r.completionist_hours = leisure
|
||||||
cache[r.app_id] = leisure
|
cache[r.app_id] = leisure
|
||||||
found += 1
|
found += 1
|
||||||
if rush_h > 0:
|
|
||||||
extras.rush[r.app_id] = rush_h
|
|
||||||
if l100 > 0:
|
|
||||||
extras.leisure_100h[r.app_id] = l100
|
|
||||||
|
|
||||||
if progress_cb is not None:
|
if progress_cb is not None:
|
||||||
progress_cb(done, total, found, r.game_name)
|
progress_cb(done, total, found, r.game_name)
|
||||||
|
|||||||
@ -32,6 +32,13 @@ from steam_backlog_enforcer._hltb_types import (
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# When extended entry has ≥ this many times more hours than the exact match,
|
||||||
|
# prefer it even if its confidence count is lower.
|
||||||
|
_EXTENDED_DOMINANCE_RATIO = 4.0
|
||||||
|
# Minimum combined confidence for the dominance path (avoids picking entries
|
||||||
|
# that have almost no data at all).
|
||||||
|
_EXTENDED_MIN_CONFIDENCE = 3
|
||||||
|
|
||||||
|
|
||||||
# ──────────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────────
|
||||||
# HLTB API setup (done once, not per-request like the library)
|
# HLTB API setup (done once, not per-request like the library)
|
||||||
@ -326,12 +333,14 @@ def _find_best_extended(
|
|||||||
) -> tuple[dict[str, Any], float] | None:
|
) -> tuple[dict[str, Any], float] | None:
|
||||||
"""Find best extended entry ("Name: Subtitle" / "Name - Subtitle").
|
"""Find best extended entry ("Name: Subtitle" / "Name - Subtitle").
|
||||||
|
|
||||||
Skips subset entries (prologue, demo, etc.).
|
Skips subset entries (prologue, demo, etc.). Compilations ("compil")
|
||||||
|
are included because HLTB classifies multi-chapter collections that
|
||||||
|
share the base title as compilations (e.g. "FAITH: The Unholy Trinity").
|
||||||
"""
|
"""
|
||||||
best: tuple[dict[str, Any], float] | None = None
|
best: tuple[dict[str, Any], float] | None = None
|
||||||
for entry, sim in usable:
|
for entry, sim in usable:
|
||||||
game_type = str(entry.get("game_type", "")).lower()
|
game_type = str(entry.get("game_type", "")).lower()
|
||||||
if game_type not in ("", "game"):
|
if game_type not in ("", "game", "compil"):
|
||||||
continue
|
continue
|
||||||
entry_name = (entry.get("game_name") or "").lower()
|
entry_name = (entry.get("game_name") or "").lower()
|
||||||
if entry_name.startswith((lower + ":", lower + " -")):
|
if entry_name.startswith((lower + ":", lower + " -")):
|
||||||
@ -358,13 +367,20 @@ def _resolve_exact_vs_extended(
|
|||||||
extended_confidence = int(best_extended[0].get("comp_100_count", 0) or 0) + int(
|
extended_confidence = int(best_extended[0].get("comp_100_count", 0) or 0) + int(
|
||||||
best_extended[0].get("count_comp", 0) or 0
|
best_extended[0].get("count_comp", 0) or 0
|
||||||
)
|
)
|
||||||
# Prefer the extended entry only when it has strictly more hours
|
# Prefer the extended entry when it has more hours AND either:
|
||||||
# than the exact match AND at least as much confidence.
|
# (a) at least as much confidence (normal case), OR
|
||||||
# This lets "FAITH: The Unholy Trinity" (full game) beat
|
# (b) dominant hours ratio (>=4x) with minimal data — handles cases
|
||||||
# a low-confidence exact demo while preventing low-confidence
|
# like "FAITH: The Unholy Trinity" (17h, newer) vs "FAITH" 2017
|
||||||
# mods like "Celeste - Strawberry Jam" from beating
|
# (1.5h, older/more data) where the older exact match has
|
||||||
# the exact base game.
|
# accumulated more confidence simply by being on HLTB longer.
|
||||||
if extended_hours > exact_hours and extended_confidence >= exact_confidence:
|
dominates = (
|
||||||
|
exact_hours > 0
|
||||||
|
and extended_hours >= exact_hours * _EXTENDED_DOMINANCE_RATIO
|
||||||
|
and extended_confidence >= _EXTENDED_MIN_CONFIDENCE
|
||||||
|
)
|
||||||
|
if extended_hours > exact_hours and (
|
||||||
|
extended_confidence >= exact_confidence or dominates
|
||||||
|
):
|
||||||
return best_extended
|
return best_extended
|
||||||
return best_exact
|
return best_exact
|
||||||
if best_exact is not None:
|
if best_exact is not None:
|
||||||
@ -419,6 +435,26 @@ async def _search_one(
|
|||||||
continue
|
continue
|
||||||
data = await resp.json()
|
data = await resp.json()
|
||||||
candidates = _collect_candidates(query_name, data)
|
candidates = _collect_candidates(query_name, data)
|
||||||
|
# When we stripped ": subtitle" from the original name to
|
||||||
|
# get query_name, only keep full-edition entries (those
|
||||||
|
# whose HLTB name starts with query_name + ":"/"-") or
|
||||||
|
# exact name/alias matches. This prevents "Vox Populi"
|
||||||
|
# (stripped from "Vox Populi: Poland 2023") from falsely
|
||||||
|
# matching "Vox Populi Vox Dei 2".
|
||||||
|
if ":" in name and ":" not in query_name:
|
||||||
|
lower_q = query_name.lower()
|
||||||
|
candidates = [
|
||||||
|
(e, s)
|
||||||
|
for e, s in candidates
|
||||||
|
if (e.get("game_name") or "").lower() == lower_q
|
||||||
|
or (e.get("game_alias") or "").lower() == lower_q
|
||||||
|
or (e.get("game_name") or "")
|
||||||
|
.lower()
|
||||||
|
.startswith(lower_q + ":")
|
||||||
|
or (e.get("game_name") or "")
|
||||||
|
.lower()
|
||||||
|
.startswith(lower_q + " -")
|
||||||
|
]
|
||||||
best = _pick_best_hltb_entry(query_name, candidates)
|
best = _pick_best_hltb_entry(query_name, candidates)
|
||||||
if best is None:
|
if best is None:
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -55,10 +55,11 @@ def _confidence_fail_reasons(game: GameInfo) -> list[str]:
|
|||||||
def _refresh_candidate_confidence(game: GameInfo) -> None:
|
def _refresh_candidate_confidence(game: GameInfo) -> None:
|
||||||
"""Refresh confidence metrics for one candidate when cache looks stale.
|
"""Refresh confidence metrics for one candidate when cache looks stale.
|
||||||
|
|
||||||
Only refreshes when both metrics are missing (0), which typically means
|
Refreshes when either metric is missing (0). A game with comp_100_count>0
|
||||||
the game was cached before confidence fields were added.
|
but count_comp==0 means the detail-page all-playstyles count was not yet
|
||||||
|
populated (e.g. the cache predates that field).
|
||||||
"""
|
"""
|
||||||
if game.comp_100_count > 0 or game.count_comp > 0:
|
if game.comp_100_count > 0 and game.count_comp > 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
_refresh_candidate_confidence_batch([game])
|
_refresh_candidate_confidence_batch([game])
|
||||||
|
|||||||
@ -216,6 +216,6 @@ def fetch_protondb_ratings(
|
|||||||
_save_cache(cache)
|
_save_cache(cache)
|
||||||
logger.info("ProtonDB: fetched %d, total cached %d", len(fetched), len(cache))
|
logger.info("ProtonDB: fetched %d, total cached %d", len(fetched), len(cache))
|
||||||
else:
|
else:
|
||||||
logger.info("All %d ProtonDB ratings found in cache.", len(results))
|
logger.debug("All %d ProtonDB ratings found in cache.", len(results))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|||||||
@ -259,12 +259,71 @@ class TestSearchOne:
|
|||||||
# Set done to one less than _SAVE_INTERVAL so it triggers save
|
# Set done to one less than _SAVE_INTERVAL so it triggers save
|
||||||
|
|
||||||
ctx.counter["done"] = _SAVE_INTERVAL - 1
|
ctx.counter["done"] = _SAVE_INTERVAL - 1
|
||||||
with patch(
|
with patch("steam_backlog_enforcer._hltb_search.save_hltb_cache") as mock_save:
|
||||||
"steam_backlog_enforcer._hltb_search.save_hltb_cache"
|
|
||||||
) as mock_save:
|
|
||||||
asyncio.run(_search_one(asyncio.Semaphore(1), ctx, 440, "TF2"))
|
asyncio.run(_search_one(asyncio.Semaphore(1), ctx, 440, "TF2"))
|
||||||
mock_save.assert_called_once()
|
mock_save.assert_called_once()
|
||||||
|
|
||||||
|
def test_colon_strip_fallback_rejects_cross_franchise_match(self) -> None:
|
||||||
|
"""Colon-stripped fallback must not match a different franchise loosely.
|
||||||
|
|
||||||
|
"Vox Populi: Poland 2023" stripped to "Vox Populi" should NOT match
|
||||||
|
"Vox Populi Vox Dei 2" (different game, low-similarity entry).
|
||||||
|
"""
|
||||||
|
empty_resp = _FakeResponse(200, {"data": []})
|
||||||
|
loose_resp = _FakeResponse(
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"game_name": "Vox Populi Vox Dei 2",
|
||||||
|
"game_alias": "",
|
||||||
|
"game_type": "game",
|
||||||
|
"comp_100": 14400,
|
||||||
|
"comp_100_count": 9,
|
||||||
|
"count_comp": 57,
|
||||||
|
"game_id": 99999,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
session = MagicMock()
|
||||||
|
session.post.side_effect = [empty_resp, loose_resp]
|
||||||
|
ctx = _make_ctx(session)
|
||||||
|
result = asyncio.run(
|
||||||
|
_search_one(asyncio.Semaphore(1), ctx, 2590810, "Vox Populi: Poland 2023")
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_colon_strip_fallback_accepts_full_edition(self) -> None:
|
||||||
|
"""Colon-stripped fallback must still match when the HLTB entry is a
|
||||||
|
full edition of the stripped name (name starts with stripped + ':').
|
||||||
|
"""
|
||||||
|
empty_resp = _FakeResponse(200, {"data": []})
|
||||||
|
full_edition_resp = _FakeResponse(
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"game_name": "Batman: Arkham Asylum",
|
||||||
|
"game_alias": "",
|
||||||
|
"game_type": "game",
|
||||||
|
"comp_100": 144000,
|
||||||
|
"comp_100_count": 300,
|
||||||
|
"count_comp": 5000,
|
||||||
|
"game_id": 11111,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
session = MagicMock()
|
||||||
|
session.post.side_effect = [empty_resp, full_edition_resp]
|
||||||
|
ctx = _make_ctx(session)
|
||||||
|
result = asyncio.run(
|
||||||
|
_search_one(asyncio.Semaphore(1), ctx, 35140, "Batman: Arkham Asylum")
|
||||||
|
)
|
||||||
|
assert result is not None
|
||||||
|
assert result.game_name == "Batman: Arkham Asylum"
|
||||||
|
|
||||||
|
|
||||||
class TestFetchBatchHltb:
|
class TestFetchBatchHltb:
|
||||||
"""Tests for _fetch_batch (the hltb version)."""
|
"""Tests for _fetch_batch (the hltb version)."""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user