steam-backlog-enforcer/steam_backlog_enforcer/_scanning_confidence.py

"""Confidence-checking and candidate-filtering helpers for scanning."""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING

from steam_backlog_enforcer._hltb_types import (
    _HLTBExtras,
    load_hltb_cache,
    load_hltb_count_comp_cache,
    load_hltb_polls_cache,
    save_hltb_cache,
)
from steam_backlog_enforcer.game_install import _echo
from steam_backlog_enforcer.hltb import fetch_hltb_confidence_cached

if TYPE_CHECKING:
    from steam_backlog_enforcer.config import State
    from steam_backlog_enforcer.steam_api import GameInfo

logger = logging.getLogger(__name__)

_MIN_COMP_100_POLLS = 3
_MIN_COUNT_COMP = 15
_MIN_CONFIDENCE_SUM = 18


def _apply_cached_confidence_to_candidates(candidates: list[GameInfo]) -> None:
    """Overlay cached confidence counters onto candidate game objects."""
    polls_cache = load_hltb_polls_cache()
    count_comp_cache = load_hltb_count_comp_cache()
    for game in candidates:
        if game.app_id in polls_cache:
            game.comp_100_count = polls_cache[game.app_id]
        if game.app_id in count_comp_cache:
            game.count_comp = count_comp_cache[game.app_id]


def _confidence_fail_reasons(game: GameInfo) -> list[str]:
    """Return threshold-failure reasons for a game's HLTB confidence data."""
    reasons: list[str] = []
    if game.comp_100_count < _MIN_COMP_100_POLLS:
        reasons.append(f"comp_100 polls {game.comp_100_count} < {_MIN_COMP_100_POLLS}")
    if game.count_comp < _MIN_COUNT_COMP:
        reasons.append(f"count_comp {game.count_comp} < {_MIN_COUNT_COMP}")

    total = game.comp_100_count + game.count_comp
    if total < _MIN_CONFIDENCE_SUM:
        reasons.append(f"comp_100+count_comp {total} < {_MIN_CONFIDENCE_SUM}")

    return reasons


def _refresh_candidate_confidence(game: GameInfo) -> None:
    """Refresh confidence metrics for one candidate when cache looks stale.

    Refreshes when either metric is missing (0).  A game with comp_100_count>0
    but count_comp==0 means the detail-page all-playstyles count was not yet
    populated (e.g. the cache predates that field).
    """
    if game.comp_100_count > 0 and game.count_comp > 0:
        return

    _refresh_candidate_confidence_batch([game])


def _force_refresh_candidate_confidence(game: GameInfo) -> None:
    """Force-refresh one candidate's confidence metrics from HLTB."""
    _refresh_candidate_confidence_batch([game], force=True)


def _refresh_candidate_confidence_batch(
    candidates: list[GameInfo],
    *,
    force: bool = False,
) -> None:
    """Refresh missing confidence metrics for candidates in one HLTB batch.

    This prevents O(N) one-game API loops when many snapshot entries predate
    confidence fields and therefore have ``comp_100_count==0`` and
    ``count_comp==0``.
    """
    missing = [
        game
        for game in candidates
        if force or (game.comp_100_count == 0 and game.count_comp == 0)
    ]
    if not missing:
        return

    refresh_slice = missing
    if len(refresh_slice) == 1:
        game = refresh_slice[0]
        _echo(f"  Refreshing HLTB confidence for {game.name} (AppID={game.app_id})...")
    else:
        _echo(f"  Refreshing HLTB confidence for {len(refresh_slice)} candidate(s)...")

    cache = load_hltb_cache()
    polls = load_hltb_polls_cache()
    count_comp = load_hltb_count_comp_cache()
    app_ids = [game.app_id for game in refresh_slice]
    names = [(game.app_id, game.name) for game in refresh_slice]
    prior_hours = {aid: cache.get(aid, -1) for aid in app_ids}

    for aid in app_ids:
        cache.pop(aid, None)
        polls.pop(aid, None)
        count_comp.pop(aid, None)
    save_hltb_cache(cache, polls, _HLTBExtras(count_comp=count_comp))

    fetch_hltb_confidence_cached(names)

    refreshed_hours = load_hltb_cache()
    refreshed_polls = load_hltb_polls_cache()
    refreshed_count_comp = load_hltb_count_comp_cache()
    for aid, old_hours in prior_hours.items():
        if old_hours > 0 and refreshed_hours.get(aid, -1) <= 0:
            refreshed_hours[aid] = old_hours
    save_hltb_cache(
        refreshed_hours, refreshed_polls, _HLTBExtras(count_comp=refreshed_count_comp)
    )

    for game in refresh_slice:
        game.comp_100_count = refreshed_polls.get(game.app_id, 0)
        game.count_comp = refreshed_count_comp.get(game.app_id, 0)


def _filter_hltb_confident_candidates(
    candidates: list[GameInfo],
) -> list[GameInfo]:
    """Keep only candidates that satisfy HLTB confidence thresholds."""
    _refresh_candidate_confidence_batch(candidates)

    kept: list[GameInfo] = []
    for game in candidates:
        reasons = _confidence_fail_reasons(game)
        if reasons:
            _echo(
                f"  Skipping {game.name} (AppID={game.app_id}): "
                f"HLTB confidence too low ({'; '.join(reasons)})"
            )
            continue
        kept.append(game)
    return kept


def _candidate_passes_hltb_confidence(game: GameInfo) -> bool:
    """Return True if candidate passes confidence with cache-first behavior.

    Only refreshes when confidence fields are missing (both zero), which keeps
    normal runs cache-friendly and avoids repeated refetches for known
    low-confidence entries.
    """
    reasons = _confidence_fail_reasons(game)
    if not reasons:
        return True

    # Re-check once when confidence fields are missing in cache.
    _refresh_candidate_confidence(game)
    reasons = _confidence_fail_reasons(game)
    if reasons:
        _echo(
            f"  Skipping {game.name} (AppID={game.app_id}): "
            f"HLTB confidence too low ({'; '.join(reasons)})"
        )
        return False
    return True


def _backfill_polls_for_finished(
    state: State,
    games: list[GameInfo],
) -> dict[int, int]:
    """Lazily fetch poll counts for already-finished games missing them.

    Reads the polls cache, identifies finished games whose poll count is
    still ``0`` (typically because the cache predates the polls schema),
    and triggers a one-shot HLTB search to backfill them. Returns the
    refreshed polls cache.
    """
    polls_cache = load_hltb_polls_cache()
    name_by_id = {g.app_id: g.name for g in games}
    missing = [
        (aid, name_by_id[aid])
        for aid in state.finished_app_ids
        if aid in name_by_id and polls_cache.get(aid, 0) == 0
    ]
    if not missing:
        return polls_cache

    logger.info(
        "Backfilling HLTB poll counts for %d already-finished games...",
        len(missing),
    )
    # Force a fresh search by removing the hours entries we want to refetch.
    # (fetch_hltb_times_cached skips entries already in the hours cache.)
    cache = load_hltb_cache()
    preserved_hours = {aid: cache[aid] for aid, _ in missing if aid in cache}
    for aid, _name in missing:
        cache.pop(aid, None)
    save_hltb_cache(cache, polls_cache)

    fetch_hltb_confidence_cached(missing)

    # Restore any previously-known hours that the refetch may have replaced
    # with a worse match (we trust prior leisure+dlc estimates).
    refreshed_hours = load_hltb_cache()
    refreshed_polls = load_hltb_polls_cache()
    for aid, prior_hours in preserved_hours.items():
        if prior_hours > 0 and refreshed_hours.get(aid, -1) <= 0:
            refreshed_hours[aid] = prior_hours
    save_hltb_cache(refreshed_hours, refreshed_polls)
    return refreshed_polls


def _report_poll_confidence(
    chosen: GameInfo,
    games: list[GameInfo],
    state: State,
) -> None:
    """Print HLTB poll-count confidence info for the just-assigned game.

    Shows the chosen game's ``comp_100_count`` (number of polled
    completionist times on HowLongToBeat) and the historical minimum
    among the user's previously-finished games. Marks a new historical
    low so the user can be skeptical of unreliable estimates.
    """
    polls_cache = _backfill_polls_for_finished(state, games)
    chosen_polls = polls_cache.get(chosen.app_id, chosen.comp_100_count)
    chosen.comp_100_count = chosen_polls

    finished_polls = [
        (polls_cache[aid], aid)
        for aid in state.finished_app_ids
        if polls_cache.get(aid, 0) > 0
    ]
    if not finished_polls:
        _echo(f"    HLTB confidence: {chosen_polls} polled completionist times")
        return

    min_polls, min_aid = min(finished_polls)
    name_by_id = {g.app_id: g.name for g in games}
    min_name = name_by_id.get(min_aid, f"AppID={min_aid}")

    warning = ""
    if 0 < chosen_polls < min_polls:
        warning = "  ⚠ NEW LOW — estimate may be unreliable"
    elif chosen_polls == 0:
        warning = "  ⚠ no polls recorded — estimate may be unreliable"

    _echo(f"    HLTB confidence: {chosen_polls} polled completionist times{warning}")
    _echo(f"    Historical min among finished: {min_polls} ({min_name})")
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00			`"""Confidence-checking and candidate-filtering helpers for scanning."""`

			`from __future__ import annotations`

			`import logging`
			`from typing import TYPE_CHECKING`

chore: set up as standalone repo Extracted from testsAndMisc monorepo. Changes: - Rewrote imports from python_pkg.steam_backlog_enforcer.* → steam_backlog_enforcer.* - Moved run.sh, install.sh, README.md, service file to repo root - Added standalone pyproject.toml, requirements.txt, .pre-commit-config.yaml, .gitignore - Added GitHub Actions CI workflows (tests + pre-commit) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 07:21:29 +02:00			`from steam_backlog_enforcer._hltb_types import (`
steam_backlog_enforcer: fix stats command — show real Rush/Leisure/Worst data Four bugs fixed: - HLTB search returned 0 results for ~87 games with special chars (™, ®, &, standalone -, (Legacy), RHCP, etc.) — add _sanitize_search_name() and extend _build_search_variants() with Steam-suffix and edition stripping - fetch_hltb_detail_missing returned immediately because `app_id not in rush` was always False (all keys present with -1) — fix to `rush.get(id,-1) <= 0` - save_hltb_cache overwrote rush/leisure on confidence-only partial saves — now reads existing cache and preserves data when extras dicts are empty - _filter_qualifying_games excluded 57 games with stale snapshot hours (-1) even though HLTB hours cache had valid data — add cache fallback Result: stats shows Rush 64,670h / Leisure 136,807h / Worst 228,594h for all 785 qualifying games with full rush+leisure detail. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 07:02:48 +02:00			`_HLTBExtras,`
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00			`load_hltb_cache,`
			`load_hltb_count_comp_cache,`
			`load_hltb_polls_cache,`
			`save_hltb_cache,`
			`)`
chore: set up as standalone repo Extracted from testsAndMisc monorepo. Changes: - Rewrote imports from python_pkg.steam_backlog_enforcer.* → steam_backlog_enforcer.* - Moved run.sh, install.sh, README.md, service file to repo root - Added standalone pyproject.toml, requirements.txt, .pre-commit-config.yaml, .gitignore - Added GitHub Actions CI workflows (tests + pre-commit) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 07:21:29 +02:00			`from steam_backlog_enforcer.game_install import _echo`
			`from steam_backlog_enforcer.hltb import fetch_hltb_confidence_cached`
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00
			`if TYPE_CHECKING:`
chore: set up as standalone repo Extracted from testsAndMisc monorepo. Changes: - Rewrote imports from python_pkg.steam_backlog_enforcer.* → steam_backlog_enforcer.* - Moved run.sh, install.sh, README.md, service file to repo root - Added standalone pyproject.toml, requirements.txt, .pre-commit-config.yaml, .gitignore - Added GitHub Actions CI workflows (tests + pre-commit) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 07:21:29 +02:00			`from steam_backlog_enforcer.config import State`
			`from steam_backlog_enforcer.steam_api import GameInfo`
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00
			`logger = logging.getLogger(__name__)`

			`_MIN_COMP_100_POLLS = 3`
			`_MIN_COUNT_COMP = 15`
			`_MIN_CONFIDENCE_SUM = 18`


			`def _apply_cached_confidence_to_candidates(candidates: list[GameInfo]) -> None:`
			`"""Overlay cached confidence counters onto candidate game objects."""`
			`polls_cache = load_hltb_polls_cache()`
			`count_comp_cache = load_hltb_count_comp_cache()`
			`for game in candidates:`
			`if game.app_id in polls_cache:`
			`game.comp_100_count = polls_cache[game.app_id]`
			`if game.app_id in count_comp_cache:`
			`game.count_comp = count_comp_cache[game.app_id]`


			`def _confidence_fail_reasons(game: GameInfo) -> list[str]:`
			`"""Return threshold-failure reasons for a game's HLTB confidence data."""`
			`reasons: list[str] = []`
			`if game.comp_100_count < _MIN_COMP_100_POLLS:`
			`reasons.append(f"comp_100 polls {game.comp_100_count} < {_MIN_COMP_100_POLLS}")`
			`if game.count_comp < _MIN_COUNT_COMP:`
			`reasons.append(f"count_comp {game.count_comp} < {_MIN_COUNT_COMP}")`

			`total = game.comp_100_count + game.count_comp`
			`if total < _MIN_CONFIDENCE_SUM:`
			`reasons.append(f"comp_100+count_comp {total} < {_MIN_CONFIDENCE_SUM}")`

			`return reasons`


			`def _refresh_candidate_confidence(game: GameInfo) -> None:`
			`"""Refresh confidence metrics for one candidate when cache looks stale.`

fix: HLTB count_comp=0 bug, false matches, and ProtonDB log noise - Extract count_comp from detail page in _apply_detail_to_extras so the all-playstyles completion count is populated even when the search API returns 0 (Mini Ghost: 0 → 69, now passes confidence thresholds) - Fix _refresh_candidate_confidence to trigger re-fetch when count_comp==0 even if comp_100_count>0 (was silently skipping stale partial entries) - Filter colon-stripped fallback candidates (e.g. "Vox Populi" from "Vox Populi: Poland 2023") to full-edition or exact matches only, preventing cross-franchise false positives - Demote "All N ProtonDB ratings found in cache" log to DEBUG to remove per-game noise from the scan output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 20:40:51 +02:00			`Refreshes when either metric is missing (0). A game with comp_100_count>0`
			`but count_comp==0 means the detail-page all-playstyles count was not yet`
			`populated (e.g. the cache predates that field).`
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00			`"""`
fix: HLTB count_comp=0 bug, false matches, and ProtonDB log noise - Extract count_comp from detail page in _apply_detail_to_extras so the all-playstyles completion count is populated even when the search API returns 0 (Mini Ghost: 0 → 69, now passes confidence thresholds) - Fix _refresh_candidate_confidence to trigger re-fetch when count_comp==0 even if comp_100_count>0 (was silently skipping stale partial entries) - Filter colon-stripped fallback candidates (e.g. "Vox Populi" from "Vox Populi: Poland 2023") to full-edition or exact matches only, preventing cross-franchise false positives - Demote "All N ProtonDB ratings found in cache" log to DEBUG to remove per-game noise from the scan output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 20:40:51 +02:00			`if game.comp_100_count > 0 and game.count_comp > 0:`
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00			`return`

			`_refresh_candidate_confidence_batch([game])`


			`def _force_refresh_candidate_confidence(game: GameInfo) -> None:`
			`"""Force-refresh one candidate's confidence metrics from HLTB."""`
			`_refresh_candidate_confidence_batch([game], force=True)`


			`def _refresh_candidate_confidence_batch(`
			`candidates: list[GameInfo],`
			`*,`
			`force: bool = False,`
			`) -> None:`
			`"""Refresh missing confidence metrics for candidates in one HLTB batch.`

			`This prevents O(N) one-game API loops when many snapshot entries predate`
			confidence fields and therefore have ``comp_100_count==0`` and
			``count_comp==0``.
			`"""`
			`missing = [`
			`game`
			`for game in candidates`
			`if force or (game.comp_100_count == 0 and game.count_comp == 0)`
			`]`
			`if not missing:`
			`return`

			`refresh_slice = missing`
			`if len(refresh_slice) == 1:`
			`game = refresh_slice[0]`
			`_echo(f" Refreshing HLTB confidence for {game.name} (AppID={game.app_id})...")`
			`else:`
			`_echo(f" Refreshing HLTB confidence for {len(refresh_slice)} candidate(s)...")`

			`cache = load_hltb_cache()`
			`polls = load_hltb_polls_cache()`
			`count_comp = load_hltb_count_comp_cache()`
			`app_ids = [game.app_id for game in refresh_slice]`
			`names = [(game.app_id, game.name) for game in refresh_slice]`
			`prior_hours = {aid: cache.get(aid, -1) for aid in app_ids}`

			`for aid in app_ids:`
			`cache.pop(aid, None)`
			`polls.pop(aid, None)`
			`count_comp.pop(aid, None)`
steam_backlog_enforcer: fix stats command — show real Rush/Leisure/Worst data Four bugs fixed: - HLTB search returned 0 results for ~87 games with special chars (™, ®, &, standalone -, (Legacy), RHCP, etc.) — add _sanitize_search_name() and extend _build_search_variants() with Steam-suffix and edition stripping - fetch_hltb_detail_missing returned immediately because `app_id not in rush` was always False (all keys present with -1) — fix to `rush.get(id,-1) <= 0` - save_hltb_cache overwrote rush/leisure on confidence-only partial saves — now reads existing cache and preserves data when extras dicts are empty - _filter_qualifying_games excluded 57 games with stale snapshot hours (-1) even though HLTB hours cache had valid data — add cache fallback Result: stats shows Rush 64,670h / Leisure 136,807h / Worst 228,594h for all 785 qualifying games with full rush+leisure detail. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 07:02:48 +02:00			`save_hltb_cache(cache, polls, _HLTBExtras(count_comp=count_comp))`
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00
			`fetch_hltb_confidence_cached(names)`

			`refreshed_hours = load_hltb_cache()`
			`refreshed_polls = load_hltb_polls_cache()`
			`refreshed_count_comp = load_hltb_count_comp_cache()`
			`for aid, old_hours in prior_hours.items():`
			`if old_hours > 0 and refreshed_hours.get(aid, -1) <= 0:`
			`refreshed_hours[aid] = old_hours`
steam_backlog_enforcer: fix stats command — show real Rush/Leisure/Worst data Four bugs fixed: - HLTB search returned 0 results for ~87 games with special chars (™, ®, &, standalone -, (Legacy), RHCP, etc.) — add _sanitize_search_name() and extend _build_search_variants() with Steam-suffix and edition stripping - fetch_hltb_detail_missing returned immediately because `app_id not in rush` was always False (all keys present with -1) — fix to `rush.get(id,-1) <= 0` - save_hltb_cache overwrote rush/leisure on confidence-only partial saves — now reads existing cache and preserves data when extras dicts are empty - _filter_qualifying_games excluded 57 games with stale snapshot hours (-1) even though HLTB hours cache had valid data — add cache fallback Result: stats shows Rush 64,670h / Leisure 136,807h / Worst 228,594h for all 785 qualifying games with full rush+leisure detail. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-05-28 07:02:48 +02:00			`save_hltb_cache(`
			`refreshed_hours, refreshed_polls, _HLTBExtras(count_comp=refreshed_count_comp)`
			`)`
Split modules, fix tests, fix pre-commit batching - steam_backlog_enforcer: extract _hltb_search.py and _scanning_confidence.py; split oversized test files into _part2/3/4.py - screen_locker: extract _early_bird.py and _window_setup.py from screen_lock.py; fix patch targets in tests (screen_lock. -> _window_setup.*) - wake_alarm: use shutil.which('xset') to avoid S607; add TestDisplayHelpers tests - linux_configuration/usage_report: split into _parsing.py and _types.py; add bin/__init__.py (INP001); fix RUF002 (× -> x) - pre-commit: add require_serial: true to pytest-coverage hook to prevent file batching across 24 CPU cores (was causing 12 parallel partial-coverage runs) 2026-05-22 22:48:28 +02:00
			`for game in refresh_slice:`
			`game.comp_100_count = refreshed_polls.get(game.app_id, 0)`
			`game.count_comp = refreshed_count_comp.get(game.app_id, 0)`


			`def _filter_hltb_confident_candidates(`
			`candidates: list[GameInfo],`
			`) -> list[GameInfo]:`
			`"""Keep only candidates that satisfy HLTB confidence thresholds."""`
			`_refresh_candidate_confidence_batch(candidates)`

			`kept: list[GameInfo] = []`
			`for game in candidates:`
			`reasons = _confidence_fail_reasons(game)`
			`if reasons:`
			`_echo(`
			`f" Skipping {game.name} (AppID={game.app_id}): "`
			`f"HLTB confidence too low ({'; '.join(reasons)})"`
			`)`
			`continue`
			`kept.append(game)`
			`return kept`


			`def _candidate_passes_hltb_confidence(game: GameInfo) -> bool:`
			`"""Return True if candidate passes confidence with cache-first behavior.`

			`Only refreshes when confidence fields are missing (both zero), which keeps`
			`normal runs cache-friendly and avoids repeated refetches for known`
			`low-confidence entries.`
			`"""`
			`reasons = _confidence_fail_reasons(game)`
			`if not reasons:`
			`return True`

			`# Re-check once when confidence fields are missing in cache.`
			`_refresh_candidate_confidence(game)`
			`reasons = _confidence_fail_reasons(game)`
			`if reasons:`
			`_echo(`
			`f" Skipping {game.name} (AppID={game.app_id}): "`
			`f"HLTB confidence too low ({'; '.join(reasons)})"`
			`)`
			`return False`
			`return True`


			`def _backfill_polls_for_finished(`
			`state: State,`
			`games: list[GameInfo],`
			`) -> dict[int, int]:`
			`"""Lazily fetch poll counts for already-finished games missing them.`

			`Reads the polls cache, identifies finished games whose poll count is`
			still ``0`` (typically because the cache predates the polls schema),
			`and triggers a one-shot HLTB search to backfill them. Returns the`
			`refreshed polls cache.`
			`"""`
			`polls_cache = load_hltb_polls_cache()`
			`name_by_id = {g.app_id: g.name for g in games}`
			`missing = [`
			`(aid, name_by_id[aid])`
			`for aid in state.finished_app_ids`
			`if aid in name_by_id and polls_cache.get(aid, 0) == 0`
			`]`
			`if not missing:`
			`return polls_cache`

			`logger.info(`
			`"Backfilling HLTB poll counts for %d already-finished games...",`
			`len(missing),`
			`)`
			`# Force a fresh search by removing the hours entries we want to refetch.`
			`# (fetch_hltb_times_cached skips entries already in the hours cache.)`
			`cache = load_hltb_cache()`
			`preserved_hours = {aid: cache[aid] for aid, _ in missing if aid in cache}`
			`for aid, _name in missing:`
			`cache.pop(aid, None)`
			`save_hltb_cache(cache, polls_cache)`

			`fetch_hltb_confidence_cached(missing)`

			`# Restore any previously-known hours that the refetch may have replaced`
			`# with a worse match (we trust prior leisure+dlc estimates).`
			`refreshed_hours = load_hltb_cache()`
			`refreshed_polls = load_hltb_polls_cache()`
			`for aid, prior_hours in preserved_hours.items():`
			`if prior_hours > 0 and refreshed_hours.get(aid, -1) <= 0:`
			`refreshed_hours[aid] = prior_hours`
			`save_hltb_cache(refreshed_hours, refreshed_polls)`
			`return refreshed_polls`


			`def _report_poll_confidence(`
			`chosen: GameInfo,`
			`games: list[GameInfo],`
			`state: State,`
			`) -> None:`
			`"""Print HLTB poll-count confidence info for the just-assigned game.`

			Shows the chosen game's ``comp_100_count`` (number of polled
			`completionist times on HowLongToBeat) and the historical minimum`
			`among the user's previously-finished games. Marks a new historical`
			`low so the user can be skeptical of unreliable estimates.`
			`"""`
			`polls_cache = _backfill_polls_for_finished(state, games)`
			`chosen_polls = polls_cache.get(chosen.app_id, chosen.comp_100_count)`
			`chosen.comp_100_count = chosen_polls`

			`finished_polls = [`
			`(polls_cache[aid], aid)`
			`for aid in state.finished_app_ids`
			`if polls_cache.get(aid, 0) > 0`
			`]`
			`if not finished_polls:`
			`_echo(f" HLTB confidence: {chosen_polls} polled completionist times")`
			`return`

			`min_polls, min_aid = min(finished_polls)`
			`name_by_id = {g.app_id: g.name for g in games}`
			`min_name = name_by_id.get(min_aid, f"AppID={min_aid}")`

			`warning = ""`
			`if 0 < chosen_polls < min_polls:`
			`warning = " ⚠ NEW LOW — estimate may be unreliable"`
			`elif chosen_polls == 0:`
			`warning = " ⚠ no polls recorded — estimate may be unreliable"`

			`_echo(f" HLTB confidence: {chosen_polls} polled completionist times{warning}")`
			`_echo(f" Historical min among finished: {min_polls} ({min_name})")`