feat(cloud): fix backup/restore for cloud mode — SQLCipher encrypt/decrypt

T13: Three fixes:
1. backup.py: _decrypt_db_to_bytes() decrypts SQLCipher DB before archiving
   so the zip is portable to any local Docker install (plain SQLite).
2. backup.py: _encrypt_db_from_bytes() re-encrypts on restore in cloud mode
   so the app can open the restored DB normally.
3. 2_Settings.py: _base_dir uses get_db_path().parent in cloud mode (user's
   per-tenant data dir) instead of the hardcoded app root; db_key wired
   through both create_backup() and restore_backup() calls.

6 new cloud backup tests + 2 unit tests for SQLCipher helpers (pysqlcipher3
mocked — not available in the local conda test env). 419/419 total passing.
This commit is contained in:
pyr0ball 2026-03-09 22:41:44 -07:00
parent 441e4ce4ef
commit 7a698496f9
3 changed files with 236 additions and 4 deletions

View file

@ -1517,7 +1517,10 @@ with tab_data:
from scripts.backup import create_backup, list_backup_contents, restore_backup as _do_restore
_base_dir = Path(__file__).parent.parent.parent
# Cloud mode: per-user data lives at get_db_path().parent — not the app root.
# db_key is used to transparently decrypt on export and re-encrypt on import.
_db_key = st.session_state.get("db_key", "") if CLOUD_MODE else ""
_base_dir = get_db_path().parent if (CLOUD_MODE and st.session_state.get("db_path")) else Path(__file__).parent.parent.parent
# ── Backup ────────────────────────────────────────────────────────────────
st.markdown("### 📦 Create Backup")
@ -1525,7 +1528,7 @@ with tab_data:
if st.button("Create Backup", key="backup_create"):
with st.spinner("Creating backup…"):
try:
_zip_bytes = create_backup(_base_dir, include_db=_incl_db)
_zip_bytes = create_backup(_base_dir, include_db=_incl_db, db_key=_db_key)
_info = list_backup_contents(_zip_bytes)
from datetime import datetime as _dt
_ts = _dt.now().strftime("%Y%m%d-%H%M%S")
@ -1572,6 +1575,7 @@ with tab_data:
_zip_bytes, _base_dir,
include_db=_restore_db,
overwrite=_restore_overwrite,
db_key=_db_key,
)
st.success(f"Restored {len(_result['restored'])} files.")
with st.expander("Details"):

View file

@ -4,6 +4,16 @@ Creates a portable zip of all gitignored configs + optionally the staging DB.
Intended for: machine migrations, Docker volume transfers, and safe wizard testing.
Supports both the Peregrine Docker instance and the legacy /devl/job-seeker install.
Cloud mode notes
----------------
In cloud mode (CLOUD_MODE=true), the staging DB is SQLCipher-encrypted.
Pass the per-user ``db_key`` to ``create_backup()`` to have it transparently
decrypt the DB before archiving producing a portable, plain SQLite file
that works with any local Docker install.
Pass the same ``db_key`` to ``restore_backup()`` and it will re-encrypt the
plain DB on its way in, so the cloud app can open it normally.
Usage (CLI):
conda run -n job-seeker python scripts/backup.py --create backup.zip
conda run -n job-seeker python scripts/backup.py --create backup.zip --no-db
@ -21,6 +31,8 @@ from __future__ import annotations
import io
import json
import os
import tempfile
import zipfile
from datetime import datetime
from pathlib import Path
@ -62,6 +74,63 @@ _DB_CANDIDATES = ["data/staging.db", "staging.db"]
_MANIFEST_NAME = "backup-manifest.json"
# ---------------------------------------------------------------------------
# SQLCipher helpers (cloud mode only — only called when db_key is set)
# ---------------------------------------------------------------------------
def _decrypt_db_to_bytes(db_path: Path, db_key: str) -> bytes:
"""Open a SQLCipher-encrypted DB and return plain SQLite bytes.
Uses SQLCipher's ATTACH + sqlcipher_export() to produce a portable
unencrypted copy. Only called in cloud mode (db_key non-empty).
pysqlcipher3 is available in the Docker image (Dockerfile installs
libsqlcipher-dev); never called in local-mode tests.
"""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
tmp_path = tmp.name
try:
from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import]
conn = _sqlcipher.connect(str(db_path))
conn.execute(f"PRAGMA key='{db_key}'")
conn.execute(f"ATTACH DATABASE '{tmp_path}' AS plaintext KEY ''")
conn.execute("SELECT sqlcipher_export('plaintext')")
conn.execute("DETACH DATABASE plaintext")
conn.close()
return Path(tmp_path).read_bytes()
finally:
try:
os.unlink(tmp_path)
except Exception:
pass
def _encrypt_db_from_bytes(plain_bytes: bytes, dest_path: Path, db_key: str) -> None:
"""Write plain SQLite bytes as a SQLCipher-encrypted DB at dest_path.
Used on restore in cloud mode to convert a portable plain backup into
the per-user encrypted format the app expects.
"""
dest_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
tmp.write(plain_bytes)
tmp_path = tmp.name
try:
from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import]
# Open the plain DB (empty key = no encryption in SQLCipher)
conn = _sqlcipher.connect(tmp_path)
conn.execute("PRAGMA key=''")
# Attach the encrypted destination and export there
conn.execute(f"ATTACH DATABASE '{dest_path}' AS encrypted KEY '{db_key}'")
conn.execute("SELECT sqlcipher_export('encrypted')")
conn.execute("DETACH DATABASE encrypted")
conn.close()
finally:
try:
os.unlink(tmp_path)
except Exception:
pass
# ---------------------------------------------------------------------------
# Source detection
# ---------------------------------------------------------------------------
@ -90,6 +159,7 @@ def create_backup(
base_dir: Path,
include_db: bool = True,
source_label: str | None = None,
db_key: str = "",
) -> bytes:
"""Return a zip archive as raw bytes.
@ -98,6 +168,9 @@ def create_backup(
include_db: If True, include staging.db in the archive.
source_label: Human-readable instance name stored in the manifest
(e.g. "peregrine", "job-seeker"). Auto-detected if None.
db_key: SQLCipher key for the DB (cloud mode). When set, the DB
is decrypted before archiving so the backup is portable
to any local Docker install.
"""
buf = io.BytesIO()
included: list[str] = []
@ -128,7 +201,12 @@ def create_backup(
for candidate in _DB_CANDIDATES:
p = base_dir / candidate
if p.exists():
zf.write(p, candidate)
if db_key:
# Cloud mode: decrypt to plain SQLite before archiving
plain_bytes = _decrypt_db_to_bytes(p, db_key)
zf.writestr(candidate, plain_bytes)
else:
zf.write(p, candidate)
included.append(candidate)
break
@ -167,6 +245,7 @@ def restore_backup(
base_dir: Path,
include_db: bool = True,
overwrite: bool = True,
db_key: str = "",
) -> dict[str, list[str]]:
"""Extract a backup zip into base_dir.
@ -175,6 +254,9 @@ def restore_backup(
base_dir: Repo root to restore into.
include_db: If False, skip any .db files.
overwrite: If False, skip files that already exist.
db_key: SQLCipher key (cloud mode). When set, any .db file in the
zip (plain SQLite) is re-encrypted on the way in so the
cloud app can open it normally.
Returns:
{"restored": [...], "skipped": [...]}
@ -194,7 +276,12 @@ def restore_backup(
skipped.append(name)
continue
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_bytes(zf.read(name))
raw = zf.read(name)
if db_key and name.endswith(".db"):
# Cloud mode: the zip contains plain SQLite — re-encrypt on restore
_encrypt_db_from_bytes(raw, dest, db_key)
else:
dest.write_bytes(raw)
restored.append(name)
return {"restored": restored, "skipped": skipped}

View file

@ -4,11 +4,14 @@ from __future__ import annotations
import json
import zipfile
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from scripts.backup import (
_decrypt_db_to_bytes,
_detect_source_label,
_encrypt_db_from_bytes,
create_backup,
list_backup_contents,
restore_backup,
@ -229,3 +232,141 @@ class TestDetectSourceLabel:
base = tmp_path / "job-seeker"
base.mkdir()
assert _detect_source_label(base) == "job-seeker"
# ---------------------------------------------------------------------------
# Cloud mode — SQLCipher encrypt / decrypt (pysqlcipher3 mocked)
# ---------------------------------------------------------------------------
class _FakeCursor:
def __enter__(self): return self
def __exit__(self, *a): return False
def execute(self, *a): pass
def fetchone(self): return None
def _make_mock_sqlcipher_conn(plain_bytes: bytes, tmp_path: Path):
"""Return a mock pysqlcipher3 connection that writes plain_bytes to the
first 'ATTACH DATABASE' path it sees (simulating sqlcipher_export)."""
attached: dict = {}
conn = MagicMock()
def fake_execute(sql, *args):
if "ATTACH DATABASE" in sql:
# Extract path between first pair of quotes
parts = sql.split("'")
path = parts[1]
attached["path"] = path
elif "sqlcipher_export" in sql:
# Simulate export: write plain_bytes to the attached path
Path(attached["path"]).write_bytes(plain_bytes)
conn.execute.side_effect = fake_execute
conn.close = MagicMock()
return conn
class TestCloudBackup:
"""Backup/restore with SQLCipher encryption — pysqlcipher3 mocked out."""
def test_create_backup_decrypts_db_when_key_set(self, tmp_path):
"""With db_key, _decrypt_db_to_bytes is called and plain bytes go into zip."""
base = _make_instance(tmp_path, "cloud-user")
plain_db = b"SQLite format 3\x00plain-content"
with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db) as mock_dec:
data = create_backup(base, include_db=True, db_key="testkey")
mock_dec.assert_called_once()
# The zip should contain the plain bytes, not the raw encrypted file
with zipfile.ZipFile(__import__("io").BytesIO(data)) as zf:
db_files = [n for n in zf.namelist() if n.endswith(".db")]
assert len(db_files) == 1
assert zf.read(db_files[0]) == plain_db
def test_create_backup_no_key_reads_file_directly(self, tmp_path):
"""Without db_key, _decrypt_db_to_bytes is NOT called."""
base = _make_instance(tmp_path, "local-user")
with patch("scripts.backup._decrypt_db_to_bytes") as mock_dec:
create_backup(base, include_db=True, db_key="")
mock_dec.assert_not_called()
def test_restore_backup_encrypts_db_when_key_set(self, tmp_path):
"""With db_key, _encrypt_db_from_bytes is called for .db files."""
src = _make_instance(tmp_path, "cloud-src")
dst = tmp_path / "cloud-dst"
dst.mkdir()
plain_db = b"SQLite format 3\x00plain-content"
# Create a backup with plain DB bytes
with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db):
data = create_backup(src, include_db=True, db_key="testkey")
with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc:
restore_backup(data, dst, include_db=True, db_key="testkey")
mock_enc.assert_called_once()
call_args = mock_enc.call_args
assert call_args[0][0] == plain_db # plain_bytes
assert call_args[0][2] == "testkey" # db_key
def test_restore_backup_no_key_writes_file_directly(self, tmp_path):
"""Without db_key, _encrypt_db_from_bytes is NOT called."""
src = _make_instance(tmp_path, "local-src")
dst = tmp_path / "local-dst"
dst.mkdir()
data = create_backup(src, include_db=True, db_key="")
with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc:
restore_backup(data, dst, include_db=True, db_key="")
mock_enc.assert_not_called()
def test_decrypt_db_to_bytes_calls_sqlcipher(self, tmp_path):
"""_decrypt_db_to_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export."""
fake_db = tmp_path / "staging.db"
fake_db.write_bytes(b"encrypted")
plain_bytes = b"SQLite format 3\x00"
mock_conn = _make_mock_sqlcipher_conn(plain_bytes, tmp_path)
mock_module = MagicMock()
mock_module.connect.return_value = mock_conn
# Must set dbapi2 explicitly on the package mock so `from pysqlcipher3 import
# dbapi2` resolves to mock_module (not a new auto-created MagicMock attr).
mock_pkg = MagicMock()
mock_pkg.dbapi2 = mock_module
with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}):
result = _decrypt_db_to_bytes(fake_db, "testkey")
mock_module.connect.assert_called_once_with(str(fake_db))
assert result == plain_bytes
def test_encrypt_db_from_bytes_calls_sqlcipher(self, tmp_path):
"""_encrypt_db_from_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export."""
dest = tmp_path / "staging.db"
plain_bytes = b"SQLite format 3\x00"
mock_conn = MagicMock()
mock_module = MagicMock()
mock_module.connect.return_value = mock_conn
mock_pkg = MagicMock()
mock_pkg.dbapi2 = mock_module
with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}):
_encrypt_db_from_bytes(plain_bytes, dest, "testkey")
mock_module.connect.assert_called_once()
# Verify ATTACH DATABASE call included the dest path and key
attach_calls = [
call for call in mock_conn.execute.call_args_list
if "ATTACH DATABASE" in str(call)
]
assert len(attach_calls) == 1
assert str(dest) in str(attach_calls[0])
assert "testkey" in str(attach_calls[0])