feat(cloud): fix backup/restore for cloud mode — SQLCipher encrypt/decrypt

T13: Three fixes:
1. backup.py: _decrypt_db_to_bytes() decrypts SQLCipher DB before archiving
   so the zip is portable to any local Docker install (plain SQLite).
2. backup.py: _encrypt_db_from_bytes() re-encrypts on restore in cloud mode
   so the app can open the restored DB normally.
3. 2_Settings.py: _base_dir uses get_db_path().parent in cloud mode (user's
   per-tenant data dir) instead of the hardcoded app root; db_key wired
   through both create_backup() and restore_backup() calls.

6 new cloud backup tests + 2 unit tests for SQLCipher helpers (pysqlcipher3
mocked — not available in the local conda test env). 419/419 total passing.
This commit is contained in:
pyr0ball 2026-03-09 22:41:44 -07:00
parent 441e4ce4ef
commit 7a698496f9
3 changed files with 236 additions and 4 deletions

View file

@ -1517,7 +1517,10 @@ with tab_data:
from scripts.backup import create_backup, list_backup_contents, restore_backup as _do_restore from scripts.backup import create_backup, list_backup_contents, restore_backup as _do_restore
_base_dir = Path(__file__).parent.parent.parent # Cloud mode: per-user data lives at get_db_path().parent — not the app root.
# db_key is used to transparently decrypt on export and re-encrypt on import.
_db_key = st.session_state.get("db_key", "") if CLOUD_MODE else ""
_base_dir = get_db_path().parent if (CLOUD_MODE and st.session_state.get("db_path")) else Path(__file__).parent.parent.parent
# ── Backup ──────────────────────────────────────────────────────────────── # ── Backup ────────────────────────────────────────────────────────────────
st.markdown("### 📦 Create Backup") st.markdown("### 📦 Create Backup")
@ -1525,7 +1528,7 @@ with tab_data:
if st.button("Create Backup", key="backup_create"): if st.button("Create Backup", key="backup_create"):
with st.spinner("Creating backup…"): with st.spinner("Creating backup…"):
try: try:
_zip_bytes = create_backup(_base_dir, include_db=_incl_db) _zip_bytes = create_backup(_base_dir, include_db=_incl_db, db_key=_db_key)
_info = list_backup_contents(_zip_bytes) _info = list_backup_contents(_zip_bytes)
from datetime import datetime as _dt from datetime import datetime as _dt
_ts = _dt.now().strftime("%Y%m%d-%H%M%S") _ts = _dt.now().strftime("%Y%m%d-%H%M%S")
@ -1572,6 +1575,7 @@ with tab_data:
_zip_bytes, _base_dir, _zip_bytes, _base_dir,
include_db=_restore_db, include_db=_restore_db,
overwrite=_restore_overwrite, overwrite=_restore_overwrite,
db_key=_db_key,
) )
st.success(f"Restored {len(_result['restored'])} files.") st.success(f"Restored {len(_result['restored'])} files.")
with st.expander("Details"): with st.expander("Details"):

View file

@ -4,6 +4,16 @@ Creates a portable zip of all gitignored configs + optionally the staging DB.
Intended for: machine migrations, Docker volume transfers, and safe wizard testing. Intended for: machine migrations, Docker volume transfers, and safe wizard testing.
Supports both the Peregrine Docker instance and the legacy /devl/job-seeker install. Supports both the Peregrine Docker instance and the legacy /devl/job-seeker install.
Cloud mode notes
----------------
In cloud mode (CLOUD_MODE=true), the staging DB is SQLCipher-encrypted.
Pass the per-user ``db_key`` to ``create_backup()`` to have it transparently
decrypt the DB before archiving producing a portable, plain SQLite file
that works with any local Docker install.
Pass the same ``db_key`` to ``restore_backup()`` and it will re-encrypt the
plain DB on its way in, so the cloud app can open it normally.
Usage (CLI): Usage (CLI):
conda run -n job-seeker python scripts/backup.py --create backup.zip conda run -n job-seeker python scripts/backup.py --create backup.zip
conda run -n job-seeker python scripts/backup.py --create backup.zip --no-db conda run -n job-seeker python scripts/backup.py --create backup.zip --no-db
@ -21,6 +31,8 @@ from __future__ import annotations
import io import io
import json import json
import os
import tempfile
import zipfile import zipfile
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
@ -62,6 +74,63 @@ _DB_CANDIDATES = ["data/staging.db", "staging.db"]
_MANIFEST_NAME = "backup-manifest.json" _MANIFEST_NAME = "backup-manifest.json"
# ---------------------------------------------------------------------------
# SQLCipher helpers (cloud mode only — only called when db_key is set)
# ---------------------------------------------------------------------------
def _decrypt_db_to_bytes(db_path: Path, db_key: str) -> bytes:
"""Open a SQLCipher-encrypted DB and return plain SQLite bytes.
Uses SQLCipher's ATTACH + sqlcipher_export() to produce a portable
unencrypted copy. Only called in cloud mode (db_key non-empty).
pysqlcipher3 is available in the Docker image (Dockerfile installs
libsqlcipher-dev); never called in local-mode tests.
"""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
tmp_path = tmp.name
try:
from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import]
conn = _sqlcipher.connect(str(db_path))
conn.execute(f"PRAGMA key='{db_key}'")
conn.execute(f"ATTACH DATABASE '{tmp_path}' AS plaintext KEY ''")
conn.execute("SELECT sqlcipher_export('plaintext')")
conn.execute("DETACH DATABASE plaintext")
conn.close()
return Path(tmp_path).read_bytes()
finally:
try:
os.unlink(tmp_path)
except Exception:
pass
def _encrypt_db_from_bytes(plain_bytes: bytes, dest_path: Path, db_key: str) -> None:
"""Write plain SQLite bytes as a SQLCipher-encrypted DB at dest_path.
Used on restore in cloud mode to convert a portable plain backup into
the per-user encrypted format the app expects.
"""
dest_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
tmp.write(plain_bytes)
tmp_path = tmp.name
try:
from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import]
# Open the plain DB (empty key = no encryption in SQLCipher)
conn = _sqlcipher.connect(tmp_path)
conn.execute("PRAGMA key=''")
# Attach the encrypted destination and export there
conn.execute(f"ATTACH DATABASE '{dest_path}' AS encrypted KEY '{db_key}'")
conn.execute("SELECT sqlcipher_export('encrypted')")
conn.execute("DETACH DATABASE encrypted")
conn.close()
finally:
try:
os.unlink(tmp_path)
except Exception:
pass
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Source detection # Source detection
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -90,6 +159,7 @@ def create_backup(
base_dir: Path, base_dir: Path,
include_db: bool = True, include_db: bool = True,
source_label: str | None = None, source_label: str | None = None,
db_key: str = "",
) -> bytes: ) -> bytes:
"""Return a zip archive as raw bytes. """Return a zip archive as raw bytes.
@ -98,6 +168,9 @@ def create_backup(
include_db: If True, include staging.db in the archive. include_db: If True, include staging.db in the archive.
source_label: Human-readable instance name stored in the manifest source_label: Human-readable instance name stored in the manifest
(e.g. "peregrine", "job-seeker"). Auto-detected if None. (e.g. "peregrine", "job-seeker"). Auto-detected if None.
db_key: SQLCipher key for the DB (cloud mode). When set, the DB
is decrypted before archiving so the backup is portable
to any local Docker install.
""" """
buf = io.BytesIO() buf = io.BytesIO()
included: list[str] = [] included: list[str] = []
@ -128,7 +201,12 @@ def create_backup(
for candidate in _DB_CANDIDATES: for candidate in _DB_CANDIDATES:
p = base_dir / candidate p = base_dir / candidate
if p.exists(): if p.exists():
zf.write(p, candidate) if db_key:
# Cloud mode: decrypt to plain SQLite before archiving
plain_bytes = _decrypt_db_to_bytes(p, db_key)
zf.writestr(candidate, plain_bytes)
else:
zf.write(p, candidate)
included.append(candidate) included.append(candidate)
break break
@ -167,6 +245,7 @@ def restore_backup(
base_dir: Path, base_dir: Path,
include_db: bool = True, include_db: bool = True,
overwrite: bool = True, overwrite: bool = True,
db_key: str = "",
) -> dict[str, list[str]]: ) -> dict[str, list[str]]:
"""Extract a backup zip into base_dir. """Extract a backup zip into base_dir.
@ -175,6 +254,9 @@ def restore_backup(
base_dir: Repo root to restore into. base_dir: Repo root to restore into.
include_db: If False, skip any .db files. include_db: If False, skip any .db files.
overwrite: If False, skip files that already exist. overwrite: If False, skip files that already exist.
db_key: SQLCipher key (cloud mode). When set, any .db file in the
zip (plain SQLite) is re-encrypted on the way in so the
cloud app can open it normally.
Returns: Returns:
{"restored": [...], "skipped": [...]} {"restored": [...], "skipped": [...]}
@ -194,7 +276,12 @@ def restore_backup(
skipped.append(name) skipped.append(name)
continue continue
dest.parent.mkdir(parents=True, exist_ok=True) dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_bytes(zf.read(name)) raw = zf.read(name)
if db_key and name.endswith(".db"):
# Cloud mode: the zip contains plain SQLite — re-encrypt on restore
_encrypt_db_from_bytes(raw, dest, db_key)
else:
dest.write_bytes(raw)
restored.append(name) restored.append(name)
return {"restored": restored, "skipped": skipped} return {"restored": restored, "skipped": skipped}

View file

@ -4,11 +4,14 @@ from __future__ import annotations
import json import json
import zipfile import zipfile
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest import pytest
from scripts.backup import ( from scripts.backup import (
_decrypt_db_to_bytes,
_detect_source_label, _detect_source_label,
_encrypt_db_from_bytes,
create_backup, create_backup,
list_backup_contents, list_backup_contents,
restore_backup, restore_backup,
@ -229,3 +232,141 @@ class TestDetectSourceLabel:
base = tmp_path / "job-seeker" base = tmp_path / "job-seeker"
base.mkdir() base.mkdir()
assert _detect_source_label(base) == "job-seeker" assert _detect_source_label(base) == "job-seeker"
# ---------------------------------------------------------------------------
# Cloud mode — SQLCipher encrypt / decrypt (pysqlcipher3 mocked)
# ---------------------------------------------------------------------------
class _FakeCursor:
def __enter__(self): return self
def __exit__(self, *a): return False
def execute(self, *a): pass
def fetchone(self): return None
def _make_mock_sqlcipher_conn(plain_bytes: bytes, tmp_path: Path):
"""Return a mock pysqlcipher3 connection that writes plain_bytes to the
first 'ATTACH DATABASE' path it sees (simulating sqlcipher_export)."""
attached: dict = {}
conn = MagicMock()
def fake_execute(sql, *args):
if "ATTACH DATABASE" in sql:
# Extract path between first pair of quotes
parts = sql.split("'")
path = parts[1]
attached["path"] = path
elif "sqlcipher_export" in sql:
# Simulate export: write plain_bytes to the attached path
Path(attached["path"]).write_bytes(plain_bytes)
conn.execute.side_effect = fake_execute
conn.close = MagicMock()
return conn
class TestCloudBackup:
"""Backup/restore with SQLCipher encryption — pysqlcipher3 mocked out."""
def test_create_backup_decrypts_db_when_key_set(self, tmp_path):
"""With db_key, _decrypt_db_to_bytes is called and plain bytes go into zip."""
base = _make_instance(tmp_path, "cloud-user")
plain_db = b"SQLite format 3\x00plain-content"
with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db) as mock_dec:
data = create_backup(base, include_db=True, db_key="testkey")
mock_dec.assert_called_once()
# The zip should contain the plain bytes, not the raw encrypted file
with zipfile.ZipFile(__import__("io").BytesIO(data)) as zf:
db_files = [n for n in zf.namelist() if n.endswith(".db")]
assert len(db_files) == 1
assert zf.read(db_files[0]) == plain_db
def test_create_backup_no_key_reads_file_directly(self, tmp_path):
"""Without db_key, _decrypt_db_to_bytes is NOT called."""
base = _make_instance(tmp_path, "local-user")
with patch("scripts.backup._decrypt_db_to_bytes") as mock_dec:
create_backup(base, include_db=True, db_key="")
mock_dec.assert_not_called()
def test_restore_backup_encrypts_db_when_key_set(self, tmp_path):
"""With db_key, _encrypt_db_from_bytes is called for .db files."""
src = _make_instance(tmp_path, "cloud-src")
dst = tmp_path / "cloud-dst"
dst.mkdir()
plain_db = b"SQLite format 3\x00plain-content"
# Create a backup with plain DB bytes
with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db):
data = create_backup(src, include_db=True, db_key="testkey")
with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc:
restore_backup(data, dst, include_db=True, db_key="testkey")
mock_enc.assert_called_once()
call_args = mock_enc.call_args
assert call_args[0][0] == plain_db # plain_bytes
assert call_args[0][2] == "testkey" # db_key
def test_restore_backup_no_key_writes_file_directly(self, tmp_path):
"""Without db_key, _encrypt_db_from_bytes is NOT called."""
src = _make_instance(tmp_path, "local-src")
dst = tmp_path / "local-dst"
dst.mkdir()
data = create_backup(src, include_db=True, db_key="")
with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc:
restore_backup(data, dst, include_db=True, db_key="")
mock_enc.assert_not_called()
def test_decrypt_db_to_bytes_calls_sqlcipher(self, tmp_path):
"""_decrypt_db_to_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export."""
fake_db = tmp_path / "staging.db"
fake_db.write_bytes(b"encrypted")
plain_bytes = b"SQLite format 3\x00"
mock_conn = _make_mock_sqlcipher_conn(plain_bytes, tmp_path)
mock_module = MagicMock()
mock_module.connect.return_value = mock_conn
# Must set dbapi2 explicitly on the package mock so `from pysqlcipher3 import
# dbapi2` resolves to mock_module (not a new auto-created MagicMock attr).
mock_pkg = MagicMock()
mock_pkg.dbapi2 = mock_module
with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}):
result = _decrypt_db_to_bytes(fake_db, "testkey")
mock_module.connect.assert_called_once_with(str(fake_db))
assert result == plain_bytes
def test_encrypt_db_from_bytes_calls_sqlcipher(self, tmp_path):
"""_encrypt_db_from_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export."""
dest = tmp_path / "staging.db"
plain_bytes = b"SQLite format 3\x00"
mock_conn = MagicMock()
mock_module = MagicMock()
mock_module.connect.return_value = mock_conn
mock_pkg = MagicMock()
mock_pkg.dbapi2 = mock_module
with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}):
_encrypt_db_from_bytes(plain_bytes, dest, "testkey")
mock_module.connect.assert_called_once()
# Verify ATTACH DATABASE call included the dest path and key
attach_calls = [
call for call in mock_conn.execute.call_args_list
if "ATTACH DATABASE" in str(call)
]
assert len(attach_calls) == 1
assert str(dest) in str(attach_calls[0])
assert "testkey" in str(attach_calls[0])