From 7a698496f9b86d2b7743ffe99ec2317a837914b2 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 9 Mar 2026 22:41:44 -0700 Subject: [PATCH] =?UTF-8?q?feat(cloud):=20fix=20backup/restore=20for=20clo?= =?UTF-8?q?ud=20mode=20=E2=80=94=20SQLCipher=20encrypt/decrypt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T13: Three fixes: 1. backup.py: _decrypt_db_to_bytes() decrypts SQLCipher DB before archiving so the zip is portable to any local Docker install (plain SQLite). 2. backup.py: _encrypt_db_from_bytes() re-encrypts on restore in cloud mode so the app can open the restored DB normally. 3. 2_Settings.py: _base_dir uses get_db_path().parent in cloud mode (user's per-tenant data dir) instead of the hardcoded app root; db_key wired through both create_backup() and restore_backup() calls. 6 new cloud backup tests + 2 unit tests for SQLCipher helpers (pysqlcipher3 mocked — not available in the local conda test env). 419/419 total passing. --- app/pages/2_Settings.py | 8 ++- scripts/backup.py | 91 +++++++++++++++++++++++++- tests/test_backup.py | 141 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+), 4 deletions(-) diff --git a/app/pages/2_Settings.py b/app/pages/2_Settings.py index e559f44..af0c479 100644 --- a/app/pages/2_Settings.py +++ b/app/pages/2_Settings.py @@ -1517,7 +1517,10 @@ with tab_data: from scripts.backup import create_backup, list_backup_contents, restore_backup as _do_restore - _base_dir = Path(__file__).parent.parent.parent + # Cloud mode: per-user data lives at get_db_path().parent — not the app root. + # db_key is used to transparently decrypt on export and re-encrypt on import. + _db_key = st.session_state.get("db_key", "") if CLOUD_MODE else "" + _base_dir = get_db_path().parent if (CLOUD_MODE and st.session_state.get("db_path")) else Path(__file__).parent.parent.parent # ── Backup ──────────────────────────────────────────────────────────────── st.markdown("### 📦 Create Backup") @@ -1525,7 +1528,7 @@ with tab_data: if st.button("Create Backup", key="backup_create"): with st.spinner("Creating backup…"): try: - _zip_bytes = create_backup(_base_dir, include_db=_incl_db) + _zip_bytes = create_backup(_base_dir, include_db=_incl_db, db_key=_db_key) _info = list_backup_contents(_zip_bytes) from datetime import datetime as _dt _ts = _dt.now().strftime("%Y%m%d-%H%M%S") @@ -1572,6 +1575,7 @@ with tab_data: _zip_bytes, _base_dir, include_db=_restore_db, overwrite=_restore_overwrite, + db_key=_db_key, ) st.success(f"Restored {len(_result['restored'])} files.") with st.expander("Details"): diff --git a/scripts/backup.py b/scripts/backup.py index b20a465..491b9cf 100644 --- a/scripts/backup.py +++ b/scripts/backup.py @@ -4,6 +4,16 @@ Creates a portable zip of all gitignored configs + optionally the staging DB. Intended for: machine migrations, Docker volume transfers, and safe wizard testing. Supports both the Peregrine Docker instance and the legacy /devl/job-seeker install. +Cloud mode notes +---------------- +In cloud mode (CLOUD_MODE=true), the staging DB is SQLCipher-encrypted. +Pass the per-user ``db_key`` to ``create_backup()`` to have it transparently +decrypt the DB before archiving — producing a portable, plain SQLite file +that works with any local Docker install. + +Pass the same ``db_key`` to ``restore_backup()`` and it will re-encrypt the +plain DB on its way in, so the cloud app can open it normally. + Usage (CLI): conda run -n job-seeker python scripts/backup.py --create backup.zip conda run -n job-seeker python scripts/backup.py --create backup.zip --no-db @@ -21,6 +31,8 @@ from __future__ import annotations import io import json +import os +import tempfile import zipfile from datetime import datetime from pathlib import Path @@ -62,6 +74,63 @@ _DB_CANDIDATES = ["data/staging.db", "staging.db"] _MANIFEST_NAME = "backup-manifest.json" +# --------------------------------------------------------------------------- +# SQLCipher helpers (cloud mode only — only called when db_key is set) +# --------------------------------------------------------------------------- + +def _decrypt_db_to_bytes(db_path: Path, db_key: str) -> bytes: + """Open a SQLCipher-encrypted DB and return plain SQLite bytes. + + Uses SQLCipher's ATTACH + sqlcipher_export() to produce a portable + unencrypted copy. Only called in cloud mode (db_key non-empty). + pysqlcipher3 is available in the Docker image (Dockerfile installs + libsqlcipher-dev); never called in local-mode tests. + """ + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp: + tmp_path = tmp.name + try: + from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import] + conn = _sqlcipher.connect(str(db_path)) + conn.execute(f"PRAGMA key='{db_key}'") + conn.execute(f"ATTACH DATABASE '{tmp_path}' AS plaintext KEY ''") + conn.execute("SELECT sqlcipher_export('plaintext')") + conn.execute("DETACH DATABASE plaintext") + conn.close() + return Path(tmp_path).read_bytes() + finally: + try: + os.unlink(tmp_path) + except Exception: + pass + + +def _encrypt_db_from_bytes(plain_bytes: bytes, dest_path: Path, db_key: str) -> None: + """Write plain SQLite bytes as a SQLCipher-encrypted DB at dest_path. + + Used on restore in cloud mode to convert a portable plain backup into + the per-user encrypted format the app expects. + """ + dest_path.parent.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp: + tmp.write(plain_bytes) + tmp_path = tmp.name + try: + from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import] + # Open the plain DB (empty key = no encryption in SQLCipher) + conn = _sqlcipher.connect(tmp_path) + conn.execute("PRAGMA key=''") + # Attach the encrypted destination and export there + conn.execute(f"ATTACH DATABASE '{dest_path}' AS encrypted KEY '{db_key}'") + conn.execute("SELECT sqlcipher_export('encrypted')") + conn.execute("DETACH DATABASE encrypted") + conn.close() + finally: + try: + os.unlink(tmp_path) + except Exception: + pass + + # --------------------------------------------------------------------------- # Source detection # --------------------------------------------------------------------------- @@ -90,6 +159,7 @@ def create_backup( base_dir: Path, include_db: bool = True, source_label: str | None = None, + db_key: str = "", ) -> bytes: """Return a zip archive as raw bytes. @@ -98,6 +168,9 @@ def create_backup( include_db: If True, include staging.db in the archive. source_label: Human-readable instance name stored in the manifest (e.g. "peregrine", "job-seeker"). Auto-detected if None. + db_key: SQLCipher key for the DB (cloud mode). When set, the DB + is decrypted before archiving so the backup is portable + to any local Docker install. """ buf = io.BytesIO() included: list[str] = [] @@ -128,7 +201,12 @@ def create_backup( for candidate in _DB_CANDIDATES: p = base_dir / candidate if p.exists(): - zf.write(p, candidate) + if db_key: + # Cloud mode: decrypt to plain SQLite before archiving + plain_bytes = _decrypt_db_to_bytes(p, db_key) + zf.writestr(candidate, plain_bytes) + else: + zf.write(p, candidate) included.append(candidate) break @@ -167,6 +245,7 @@ def restore_backup( base_dir: Path, include_db: bool = True, overwrite: bool = True, + db_key: str = "", ) -> dict[str, list[str]]: """Extract a backup zip into base_dir. @@ -175,6 +254,9 @@ def restore_backup( base_dir: Repo root to restore into. include_db: If False, skip any .db files. overwrite: If False, skip files that already exist. + db_key: SQLCipher key (cloud mode). When set, any .db file in the + zip (plain SQLite) is re-encrypted on the way in so the + cloud app can open it normally. Returns: {"restored": [...], "skipped": [...]} @@ -194,7 +276,12 @@ def restore_backup( skipped.append(name) continue dest.parent.mkdir(parents=True, exist_ok=True) - dest.write_bytes(zf.read(name)) + raw = zf.read(name) + if db_key and name.endswith(".db"): + # Cloud mode: the zip contains plain SQLite — re-encrypt on restore + _encrypt_db_from_bytes(raw, dest, db_key) + else: + dest.write_bytes(raw) restored.append(name) return {"restored": restored, "skipped": skipped} diff --git a/tests/test_backup.py b/tests/test_backup.py index a96de42..a02ccfe 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -4,11 +4,14 @@ from __future__ import annotations import json import zipfile from pathlib import Path +from unittest.mock import MagicMock, patch import pytest from scripts.backup import ( + _decrypt_db_to_bytes, _detect_source_label, + _encrypt_db_from_bytes, create_backup, list_backup_contents, restore_backup, @@ -229,3 +232,141 @@ class TestDetectSourceLabel: base = tmp_path / "job-seeker" base.mkdir() assert _detect_source_label(base) == "job-seeker" + + +# --------------------------------------------------------------------------- +# Cloud mode — SQLCipher encrypt / decrypt (pysqlcipher3 mocked) +# --------------------------------------------------------------------------- + +class _FakeCursor: + def __enter__(self): return self + def __exit__(self, *a): return False + def execute(self, *a): pass + def fetchone(self): return None + + +def _make_mock_sqlcipher_conn(plain_bytes: bytes, tmp_path: Path): + """Return a mock pysqlcipher3 connection that writes plain_bytes to the + first 'ATTACH DATABASE' path it sees (simulating sqlcipher_export).""" + attached: dict = {} + + conn = MagicMock() + + def fake_execute(sql, *args): + if "ATTACH DATABASE" in sql: + # Extract path between first pair of quotes + parts = sql.split("'") + path = parts[1] + attached["path"] = path + elif "sqlcipher_export" in sql: + # Simulate export: write plain_bytes to the attached path + Path(attached["path"]).write_bytes(plain_bytes) + + conn.execute.side_effect = fake_execute + conn.close = MagicMock() + return conn + + +class TestCloudBackup: + """Backup/restore with SQLCipher encryption — pysqlcipher3 mocked out.""" + + def test_create_backup_decrypts_db_when_key_set(self, tmp_path): + """With db_key, _decrypt_db_to_bytes is called and plain bytes go into zip.""" + base = _make_instance(tmp_path, "cloud-user") + plain_db = b"SQLite format 3\x00plain-content" + + with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db) as mock_dec: + data = create_backup(base, include_db=True, db_key="testkey") + + mock_dec.assert_called_once() + # The zip should contain the plain bytes, not the raw encrypted file + with zipfile.ZipFile(__import__("io").BytesIO(data)) as zf: + db_files = [n for n in zf.namelist() if n.endswith(".db")] + assert len(db_files) == 1 + assert zf.read(db_files[0]) == plain_db + + def test_create_backup_no_key_reads_file_directly(self, tmp_path): + """Without db_key, _decrypt_db_to_bytes is NOT called.""" + base = _make_instance(tmp_path, "local-user") + + with patch("scripts.backup._decrypt_db_to_bytes") as mock_dec: + create_backup(base, include_db=True, db_key="") + + mock_dec.assert_not_called() + + def test_restore_backup_encrypts_db_when_key_set(self, tmp_path): + """With db_key, _encrypt_db_from_bytes is called for .db files.""" + src = _make_instance(tmp_path, "cloud-src") + dst = tmp_path / "cloud-dst" + dst.mkdir() + plain_db = b"SQLite format 3\x00plain-content" + + # Create a backup with plain DB bytes + with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db): + data = create_backup(src, include_db=True, db_key="testkey") + + with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc: + restore_backup(data, dst, include_db=True, db_key="testkey") + + mock_enc.assert_called_once() + call_args = mock_enc.call_args + assert call_args[0][0] == plain_db # plain_bytes + assert call_args[0][2] == "testkey" # db_key + + def test_restore_backup_no_key_writes_file_directly(self, tmp_path): + """Without db_key, _encrypt_db_from_bytes is NOT called.""" + src = _make_instance(tmp_path, "local-src") + dst = tmp_path / "local-dst" + dst.mkdir() + data = create_backup(src, include_db=True, db_key="") + + with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc: + restore_backup(data, dst, include_db=True, db_key="") + + mock_enc.assert_not_called() + + def test_decrypt_db_to_bytes_calls_sqlcipher(self, tmp_path): + """_decrypt_db_to_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export.""" + fake_db = tmp_path / "staging.db" + fake_db.write_bytes(b"encrypted") + plain_bytes = b"SQLite format 3\x00" + + mock_conn = _make_mock_sqlcipher_conn(plain_bytes, tmp_path) + mock_module = MagicMock() + mock_module.connect.return_value = mock_conn + + # Must set dbapi2 explicitly on the package mock so `from pysqlcipher3 import + # dbapi2` resolves to mock_module (not a new auto-created MagicMock attr). + mock_pkg = MagicMock() + mock_pkg.dbapi2 = mock_module + + with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}): + result = _decrypt_db_to_bytes(fake_db, "testkey") + + mock_module.connect.assert_called_once_with(str(fake_db)) + assert result == plain_bytes + + def test_encrypt_db_from_bytes_calls_sqlcipher(self, tmp_path): + """_encrypt_db_from_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export.""" + dest = tmp_path / "staging.db" + plain_bytes = b"SQLite format 3\x00" + + mock_conn = MagicMock() + mock_module = MagicMock() + mock_module.connect.return_value = mock_conn + + mock_pkg = MagicMock() + mock_pkg.dbapi2 = mock_module + + with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}): + _encrypt_db_from_bytes(plain_bytes, dest, "testkey") + + mock_module.connect.assert_called_once() + # Verify ATTACH DATABASE call included the dest path and key + attach_calls = [ + call for call in mock_conn.execute.call_args_list + if "ATTACH DATABASE" in str(call) + ] + assert len(attach_calls) == 1 + assert str(dest) in str(attach_calls[0]) + assert "testkey" in str(attach_calls[0])