mirror of
https://github.com/kuhyx/diet-guard.git
synced 2026-07-04 13:03:10 +02:00
Pulls every other device's pushed log from GitHub-backed dumb storage, merges it with the local log, and pushes this device's own merged copy back -- the PC half of the diet-guard-app sync plan. - _sync_merge.py: pure union-by-id merge, tombstone always wins, legacy (time, desc) dedup for pre-id entries. Commutative and idempotent. - _sync_github.py: minimal GitHub Contents API client (list/get/put), distinguishing a 404 on an unused path from the repo itself being unreachable. - _sync.py: orchestration -- pull, merge, re-sign every persisted entry regardless of origin, write, rebuild the food bank, push. Re-signing unconditionally is load-bearing: an unsigned phone-origin entry would otherwise be silently dropped on the very next read once a machine holds the shared HMAC key. - _foodbank.rebuild_food_bank(): the "replay a full log into a fresh bank" entrypoint the Python side was missing (the Dart port already had its equivalent). Backs sync's bank-rebuild step. - New diet-guard-sync.service/.timer (15-minute cadence, headless, a separate unit from the gate so a held lock can't stall sync) and a new install.sh step to install them. - Created the private kuhyx/diet-guard-sync GitHub repo for storage. Incidental to this feature: adding the `sync` subcommand pushed _cli.py past the repo's 500-line cap, so `gate`'s CLI glue moved out alongside sync's into _cli_gate.py/_cli_sync.py -- same split pattern already used for the gate window logic itself, not a sync-specific design choice. 338 tests, 100% branch coverage. Verified importing and running cleanly under /usr/bin/python (the production interpreter), not just the dev venv -- the gap that caused the earlier 3-day outage. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01FU3f5KQ1GHXsbbSecfVEyF
191 lines
6.7 KiB
Python
191 lines
6.7 KiB
Python
"""Minimal GitHub Contents API client for diet_guard's dumb-storage sync.
|
|
|
|
GitHub is used purely as file storage via the REST Contents API, not a git
|
|
clone -- ported in spirit from ``~/todo``'s sync transport. There is no
|
|
working tree and no git-level merge; the only merge is the domain-level one
|
|
in :mod:`diet_guard._sync_merge`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import logging
|
|
|
|
import requests
|
|
|
|
from diet_guard._constants import SYNC_TIMEOUT_SECONDS
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
_API_BASE = "https://api.github.com"
|
|
_HTTP_NOT_FOUND = 404
|
|
|
|
|
|
class GitHubSyncError(Exception):
|
|
"""Raised for a GitHub API failure the caller must not silently ignore."""
|
|
|
|
|
|
class RepoNotFoundError(GitHubSyncError):
|
|
"""Raised when the configured repo itself is unreachable.
|
|
|
|
Distinguished from a path-404 (nothing pushed to that path yet, which is
|
|
benign -- it just means no other device has synced before) so the caller
|
|
can tell "the repo name is wrong or the PAT isn't scoped to it" apart
|
|
from "no other device has synced yet".
|
|
"""
|
|
|
|
|
|
class GitHubSyncClient:
|
|
"""Thin wrapper around the subset of the Contents API sync needs."""
|
|
|
|
def __init__(self, owner: str, repo: str, token: str) -> None:
|
|
"""Create a client scoped to one repo, authenticated with ``token``.
|
|
|
|
Args:
|
|
owner: The repo owner/org (e.g. ``"kuhyx"``).
|
|
repo: The repo name (e.g. ``"diet-guard-sync"``).
|
|
token: A GitHub PAT with contents read/write on that repo.
|
|
"""
|
|
self._owner = owner
|
|
self._repo = repo
|
|
self._headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Accept": "application/vnd.github+json",
|
|
}
|
|
|
|
def _contents_url(self, path: str) -> str:
|
|
return f"{_API_BASE}/repos/{self._owner}/{self._repo}/contents/{path}"
|
|
|
|
def _get(self, path: str) -> requests.Response:
|
|
try:
|
|
return requests.get(
|
|
self._contents_url(path),
|
|
headers=self._headers,
|
|
timeout=SYNC_TIMEOUT_SECONDS,
|
|
)
|
|
except requests.RequestException as exc:
|
|
msg = f"network error reading {path}"
|
|
raise GitHubSyncError(msg) from exc
|
|
|
|
def _repo_exists(self) -> bool:
|
|
try:
|
|
response = requests.get(
|
|
f"{_API_BASE}/repos/{self._owner}/{self._repo}",
|
|
headers=self._headers,
|
|
timeout=SYNC_TIMEOUT_SECONDS,
|
|
)
|
|
except requests.RequestException:
|
|
return False
|
|
return response.ok
|
|
|
|
def _raise_for_missing_path(self, path: str) -> None:
|
|
"""Raise :class:`RepoNotFoundError` only if the repo is unreachable.
|
|
|
|
A 404 on a path within a reachable repo just means nothing has been
|
|
pushed there yet, which is not an error worth raising on.
|
|
"""
|
|
if not self._repo_exists():
|
|
msg = (
|
|
f"{self._owner}/{self._repo} not found, private without "
|
|
f"access, or the token lacks contents permission "
|
|
f"(while reading {path})"
|
|
)
|
|
raise RepoNotFoundError(msg)
|
|
|
|
def get_file_text(self, path: str) -> str | None:
|
|
"""Return the decoded text content at ``path``, or None if unused.
|
|
|
|
Args:
|
|
path: A repo-relative file path, e.g. ``"devices/pc/food_log.json"``.
|
|
|
|
Returns:
|
|
The file's text content, or None if nothing has been pushed
|
|
there yet (but the repo itself is reachable).
|
|
|
|
Raises:
|
|
RepoNotFoundError: If the repo itself is unreachable.
|
|
GitHubSyncError: For any other non-2xx response or network error.
|
|
"""
|
|
response = self._get(path)
|
|
if response.status_code == _HTTP_NOT_FOUND:
|
|
self._raise_for_missing_path(path)
|
|
return None
|
|
if not response.ok:
|
|
msg = f"GET {path} failed: {response.status_code}"
|
|
raise GitHubSyncError(msg)
|
|
data = response.json()
|
|
content = data.get("content", "") if isinstance(data, dict) else ""
|
|
return base64.b64decode(content).decode("utf-8")
|
|
|
|
def _existing_sha(self, path: str) -> str | None:
|
|
response = self._get(path)
|
|
if response.status_code == _HTTP_NOT_FOUND:
|
|
self._raise_for_missing_path(path)
|
|
return None
|
|
if not response.ok:
|
|
msg = f"GET {path} (for sha) failed: {response.status_code}"
|
|
raise GitHubSyncError(
|
|
msg,
|
|
)
|
|
data = response.json()
|
|
sha = data.get("sha") if isinstance(data, dict) else None
|
|
return sha if isinstance(sha, str) else None
|
|
|
|
def put_file_text(self, path: str, text: str, *, message: str) -> None:
|
|
"""Create or update the file at ``path`` with ``text``.
|
|
|
|
Args:
|
|
path: A repo-relative file path.
|
|
text: The full new content (this device's complete merged log).
|
|
message: The commit message for this push.
|
|
|
|
Raises:
|
|
GitHubSyncError: On any non-2xx response or network error.
|
|
"""
|
|
sha = self._existing_sha(path)
|
|
payload: dict[str, object] = {
|
|
"message": message,
|
|
"content": base64.b64encode(text.encode("utf-8")).decode("ascii"),
|
|
}
|
|
if sha is not None:
|
|
payload["sha"] = sha
|
|
try:
|
|
response = requests.put(
|
|
self._contents_url(path),
|
|
headers=self._headers,
|
|
json=payload,
|
|
timeout=SYNC_TIMEOUT_SECONDS,
|
|
)
|
|
except requests.RequestException as exc:
|
|
msg = f"network error pushing {path}"
|
|
raise GitHubSyncError(msg) from exc
|
|
if not response.ok:
|
|
msg = f"PUT {path} failed: {response.status_code}"
|
|
raise GitHubSyncError(msg)
|
|
|
|
def list_directory(self, path: str) -> list[str]:
|
|
"""Return the entry names directly under ``path`` (empty if unused).
|
|
|
|
Args:
|
|
path: A repo-relative directory path, e.g. ``"devices"``.
|
|
|
|
Raises:
|
|
RepoNotFoundError: If the repo itself is unreachable.
|
|
GitHubSyncError: For any other non-2xx response or network error.
|
|
"""
|
|
response = self._get(path)
|
|
if response.status_code == _HTTP_NOT_FOUND:
|
|
self._raise_for_missing_path(path)
|
|
return []
|
|
if not response.ok:
|
|
msg = f"GET {path} (list) failed: {response.status_code}"
|
|
raise GitHubSyncError(msg)
|
|
data = response.json()
|
|
if not isinstance(data, list):
|
|
return []
|
|
return [
|
|
item["name"]
|
|
for item in data
|
|
if isinstance(item, dict) and isinstance(item.get("name"), str)
|
|
]
|