diet-guard/diet_guard/_sync_github.py
Krzysztof kuhy Rudnicki e5b80fd610 Add cross-device log sync (Python half of Milestone 3)
Pulls every other device's pushed log from GitHub-backed dumb storage,
merges it with the local log, and pushes this device's own merged copy
back -- the PC half of the diet-guard-app sync plan.

- _sync_merge.py: pure union-by-id merge, tombstone always wins, legacy
  (time, desc) dedup for pre-id entries. Commutative and idempotent.
- _sync_github.py: minimal GitHub Contents API client (list/get/put),
  distinguishing a 404 on an unused path from the repo itself being
  unreachable.
- _sync.py: orchestration -- pull, merge, re-sign every persisted entry
  regardless of origin, write, rebuild the food bank, push. Re-signing
  unconditionally is load-bearing: an unsigned phone-origin entry would
  otherwise be silently dropped on the very next read once a machine
  holds the shared HMAC key.
- _foodbank.rebuild_food_bank(): the "replay a full log into a fresh
  bank" entrypoint the Python side was missing (the Dart port already
  had its equivalent). Backs sync's bank-rebuild step.
- New diet-guard-sync.service/.timer (15-minute cadence, headless, a
  separate unit from the gate so a held lock can't stall sync) and a
  new install.sh step to install them.
- Created the private kuhyx/diet-guard-sync GitHub repo for storage.

Incidental to this feature: adding the `sync` subcommand pushed _cli.py
past the repo's 500-line cap, so `gate`'s CLI glue moved out alongside
sync's into _cli_gate.py/_cli_sync.py -- same split pattern already used
for the gate window logic itself, not a sync-specific design choice.

338 tests, 100% branch coverage. Verified importing and running cleanly
under /usr/bin/python (the production interpreter), not just the dev
venv -- the gap that caused the earlier 3-day outage.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01FU3f5KQ1GHXsbbSecfVEyF
2026-06-22 19:36:27 +02:00

191 lines
6.7 KiB
Python

"""Minimal GitHub Contents API client for diet_guard's dumb-storage sync.
GitHub is used purely as file storage via the REST Contents API, not a git
clone -- ported in spirit from ``~/todo``'s sync transport. There is no
working tree and no git-level merge; the only merge is the domain-level one
in :mod:`diet_guard._sync_merge`.
"""
from __future__ import annotations
import base64
import logging
import requests
from diet_guard._constants import SYNC_TIMEOUT_SECONDS
_logger = logging.getLogger(__name__)
_API_BASE = "https://api.github.com"
_HTTP_NOT_FOUND = 404
class GitHubSyncError(Exception):
"""Raised for a GitHub API failure the caller must not silently ignore."""
class RepoNotFoundError(GitHubSyncError):
"""Raised when the configured repo itself is unreachable.
Distinguished from a path-404 (nothing pushed to that path yet, which is
benign -- it just means no other device has synced before) so the caller
can tell "the repo name is wrong or the PAT isn't scoped to it" apart
from "no other device has synced yet".
"""
class GitHubSyncClient:
"""Thin wrapper around the subset of the Contents API sync needs."""
def __init__(self, owner: str, repo: str, token: str) -> None:
"""Create a client scoped to one repo, authenticated with ``token``.
Args:
owner: The repo owner/org (e.g. ``"kuhyx"``).
repo: The repo name (e.g. ``"diet-guard-sync"``).
token: A GitHub PAT with contents read/write on that repo.
"""
self._owner = owner
self._repo = repo
self._headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/vnd.github+json",
}
def _contents_url(self, path: str) -> str:
return f"{_API_BASE}/repos/{self._owner}/{self._repo}/contents/{path}"
def _get(self, path: str) -> requests.Response:
try:
return requests.get(
self._contents_url(path),
headers=self._headers,
timeout=SYNC_TIMEOUT_SECONDS,
)
except requests.RequestException as exc:
msg = f"network error reading {path}"
raise GitHubSyncError(msg) from exc
def _repo_exists(self) -> bool:
try:
response = requests.get(
f"{_API_BASE}/repos/{self._owner}/{self._repo}",
headers=self._headers,
timeout=SYNC_TIMEOUT_SECONDS,
)
except requests.RequestException:
return False
return response.ok
def _raise_for_missing_path(self, path: str) -> None:
"""Raise :class:`RepoNotFoundError` only if the repo is unreachable.
A 404 on a path within a reachable repo just means nothing has been
pushed there yet, which is not an error worth raising on.
"""
if not self._repo_exists():
msg = (
f"{self._owner}/{self._repo} not found, private without "
f"access, or the token lacks contents permission "
f"(while reading {path})"
)
raise RepoNotFoundError(msg)
def get_file_text(self, path: str) -> str | None:
"""Return the decoded text content at ``path``, or None if unused.
Args:
path: A repo-relative file path, e.g. ``"devices/pc/food_log.json"``.
Returns:
The file's text content, or None if nothing has been pushed
there yet (but the repo itself is reachable).
Raises:
RepoNotFoundError: If the repo itself is unreachable.
GitHubSyncError: For any other non-2xx response or network error.
"""
response = self._get(path)
if response.status_code == _HTTP_NOT_FOUND:
self._raise_for_missing_path(path)
return None
if not response.ok:
msg = f"GET {path} failed: {response.status_code}"
raise GitHubSyncError(msg)
data = response.json()
content = data.get("content", "") if isinstance(data, dict) else ""
return base64.b64decode(content).decode("utf-8")
def _existing_sha(self, path: str) -> str | None:
response = self._get(path)
if response.status_code == _HTTP_NOT_FOUND:
self._raise_for_missing_path(path)
return None
if not response.ok:
msg = f"GET {path} (for sha) failed: {response.status_code}"
raise GitHubSyncError(
msg,
)
data = response.json()
sha = data.get("sha") if isinstance(data, dict) else None
return sha if isinstance(sha, str) else None
def put_file_text(self, path: str, text: str, *, message: str) -> None:
"""Create or update the file at ``path`` with ``text``.
Args:
path: A repo-relative file path.
text: The full new content (this device's complete merged log).
message: The commit message for this push.
Raises:
GitHubSyncError: On any non-2xx response or network error.
"""
sha = self._existing_sha(path)
payload: dict[str, object] = {
"message": message,
"content": base64.b64encode(text.encode("utf-8")).decode("ascii"),
}
if sha is not None:
payload["sha"] = sha
try:
response = requests.put(
self._contents_url(path),
headers=self._headers,
json=payload,
timeout=SYNC_TIMEOUT_SECONDS,
)
except requests.RequestException as exc:
msg = f"network error pushing {path}"
raise GitHubSyncError(msg) from exc
if not response.ok:
msg = f"PUT {path} failed: {response.status_code}"
raise GitHubSyncError(msg)
def list_directory(self, path: str) -> list[str]:
"""Return the entry names directly under ``path`` (empty if unused).
Args:
path: A repo-relative directory path, e.g. ``"devices"``.
Raises:
RepoNotFoundError: If the repo itself is unreachable.
GitHubSyncError: For any other non-2xx response or network error.
"""
response = self._get(path)
if response.status_code == _HTTP_NOT_FOUND:
self._raise_for_missing_path(path)
return []
if not response.ok:
msg = f"GET {path} (list) failed: {response.status_code}"
raise GitHubSyncError(msg)
data = response.json()
if not isinstance(data, list):
return []
return [
item["name"]
for item in data
if isinstance(item, dict) and isinstance(item.get("name"), str)
]