feat(cloud): fix backup/restore for cloud mode — SQLCipher encrypt/decrypt
T13: Three fixes: 1. backup.py: _decrypt_db_to_bytes() decrypts SQLCipher DB before archiving so the zip is portable to any local Docker install (plain SQLite). 2. backup.py: _encrypt_db_from_bytes() re-encrypts on restore in cloud mode so the app can open the restored DB normally. 3. 2_Settings.py: _base_dir uses get_db_path().parent in cloud mode (user's per-tenant data dir) instead of the hardcoded app root; db_key wired through both create_backup() and restore_backup() calls. 6 new cloud backup tests + 2 unit tests for SQLCipher helpers (pysqlcipher3 mocked — not available in the local conda test env). 419/419 total passing.
This commit is contained in:
parent
441e4ce4ef
commit
7a698496f9
3 changed files with 236 additions and 4 deletions
|
|
@ -1517,7 +1517,10 @@ with tab_data:
|
|||
|
||||
from scripts.backup import create_backup, list_backup_contents, restore_backup as _do_restore
|
||||
|
||||
_base_dir = Path(__file__).parent.parent.parent
|
||||
# Cloud mode: per-user data lives at get_db_path().parent — not the app root.
|
||||
# db_key is used to transparently decrypt on export and re-encrypt on import.
|
||||
_db_key = st.session_state.get("db_key", "") if CLOUD_MODE else ""
|
||||
_base_dir = get_db_path().parent if (CLOUD_MODE and st.session_state.get("db_path")) else Path(__file__).parent.parent.parent
|
||||
|
||||
# ── Backup ────────────────────────────────────────────────────────────────
|
||||
st.markdown("### 📦 Create Backup")
|
||||
|
|
@ -1525,7 +1528,7 @@ with tab_data:
|
|||
if st.button("Create Backup", key="backup_create"):
|
||||
with st.spinner("Creating backup…"):
|
||||
try:
|
||||
_zip_bytes = create_backup(_base_dir, include_db=_incl_db)
|
||||
_zip_bytes = create_backup(_base_dir, include_db=_incl_db, db_key=_db_key)
|
||||
_info = list_backup_contents(_zip_bytes)
|
||||
from datetime import datetime as _dt
|
||||
_ts = _dt.now().strftime("%Y%m%d-%H%M%S")
|
||||
|
|
@ -1572,6 +1575,7 @@ with tab_data:
|
|||
_zip_bytes, _base_dir,
|
||||
include_db=_restore_db,
|
||||
overwrite=_restore_overwrite,
|
||||
db_key=_db_key,
|
||||
)
|
||||
st.success(f"Restored {len(_result['restored'])} files.")
|
||||
with st.expander("Details"):
|
||||
|
|
|
|||
|
|
@ -4,6 +4,16 @@ Creates a portable zip of all gitignored configs + optionally the staging DB.
|
|||
Intended for: machine migrations, Docker volume transfers, and safe wizard testing.
|
||||
Supports both the Peregrine Docker instance and the legacy /devl/job-seeker install.
|
||||
|
||||
Cloud mode notes
|
||||
----------------
|
||||
In cloud mode (CLOUD_MODE=true), the staging DB is SQLCipher-encrypted.
|
||||
Pass the per-user ``db_key`` to ``create_backup()`` to have it transparently
|
||||
decrypt the DB before archiving — producing a portable, plain SQLite file
|
||||
that works with any local Docker install.
|
||||
|
||||
Pass the same ``db_key`` to ``restore_backup()`` and it will re-encrypt the
|
||||
plain DB on its way in, so the cloud app can open it normally.
|
||||
|
||||
Usage (CLI):
|
||||
conda run -n job-seeker python scripts/backup.py --create backup.zip
|
||||
conda run -n job-seeker python scripts/backup.py --create backup.zip --no-db
|
||||
|
|
@ -21,6 +31,8 @@ from __future__ import annotations
|
|||
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
|
@ -62,6 +74,63 @@ _DB_CANDIDATES = ["data/staging.db", "staging.db"]
|
|||
_MANIFEST_NAME = "backup-manifest.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQLCipher helpers (cloud mode only — only called when db_key is set)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _decrypt_db_to_bytes(db_path: Path, db_key: str) -> bytes:
|
||||
"""Open a SQLCipher-encrypted DB and return plain SQLite bytes.
|
||||
|
||||
Uses SQLCipher's ATTACH + sqlcipher_export() to produce a portable
|
||||
unencrypted copy. Only called in cloud mode (db_key non-empty).
|
||||
pysqlcipher3 is available in the Docker image (Dockerfile installs
|
||||
libsqlcipher-dev); never called in local-mode tests.
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
try:
|
||||
from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import]
|
||||
conn = _sqlcipher.connect(str(db_path))
|
||||
conn.execute(f"PRAGMA key='{db_key}'")
|
||||
conn.execute(f"ATTACH DATABASE '{tmp_path}' AS plaintext KEY ''")
|
||||
conn.execute("SELECT sqlcipher_export('plaintext')")
|
||||
conn.execute("DETACH DATABASE plaintext")
|
||||
conn.close()
|
||||
return Path(tmp_path).read_bytes()
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _encrypt_db_from_bytes(plain_bytes: bytes, dest_path: Path, db_key: str) -> None:
|
||||
"""Write plain SQLite bytes as a SQLCipher-encrypted DB at dest_path.
|
||||
|
||||
Used on restore in cloud mode to convert a portable plain backup into
|
||||
the per-user encrypted format the app expects.
|
||||
"""
|
||||
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
|
||||
tmp.write(plain_bytes)
|
||||
tmp_path = tmp.name
|
||||
try:
|
||||
from pysqlcipher3 import dbapi2 as _sqlcipher # type: ignore[import]
|
||||
# Open the plain DB (empty key = no encryption in SQLCipher)
|
||||
conn = _sqlcipher.connect(tmp_path)
|
||||
conn.execute("PRAGMA key=''")
|
||||
# Attach the encrypted destination and export there
|
||||
conn.execute(f"ATTACH DATABASE '{dest_path}' AS encrypted KEY '{db_key}'")
|
||||
conn.execute("SELECT sqlcipher_export('encrypted')")
|
||||
conn.execute("DETACH DATABASE encrypted")
|
||||
conn.close()
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -90,6 +159,7 @@ def create_backup(
|
|||
base_dir: Path,
|
||||
include_db: bool = True,
|
||||
source_label: str | None = None,
|
||||
db_key: str = "",
|
||||
) -> bytes:
|
||||
"""Return a zip archive as raw bytes.
|
||||
|
||||
|
|
@ -98,6 +168,9 @@ def create_backup(
|
|||
include_db: If True, include staging.db in the archive.
|
||||
source_label: Human-readable instance name stored in the manifest
|
||||
(e.g. "peregrine", "job-seeker"). Auto-detected if None.
|
||||
db_key: SQLCipher key for the DB (cloud mode). When set, the DB
|
||||
is decrypted before archiving so the backup is portable
|
||||
to any local Docker install.
|
||||
"""
|
||||
buf = io.BytesIO()
|
||||
included: list[str] = []
|
||||
|
|
@ -128,7 +201,12 @@ def create_backup(
|
|||
for candidate in _DB_CANDIDATES:
|
||||
p = base_dir / candidate
|
||||
if p.exists():
|
||||
zf.write(p, candidate)
|
||||
if db_key:
|
||||
# Cloud mode: decrypt to plain SQLite before archiving
|
||||
plain_bytes = _decrypt_db_to_bytes(p, db_key)
|
||||
zf.writestr(candidate, plain_bytes)
|
||||
else:
|
||||
zf.write(p, candidate)
|
||||
included.append(candidate)
|
||||
break
|
||||
|
||||
|
|
@ -167,6 +245,7 @@ def restore_backup(
|
|||
base_dir: Path,
|
||||
include_db: bool = True,
|
||||
overwrite: bool = True,
|
||||
db_key: str = "",
|
||||
) -> dict[str, list[str]]:
|
||||
"""Extract a backup zip into base_dir.
|
||||
|
||||
|
|
@ -175,6 +254,9 @@ def restore_backup(
|
|||
base_dir: Repo root to restore into.
|
||||
include_db: If False, skip any .db files.
|
||||
overwrite: If False, skip files that already exist.
|
||||
db_key: SQLCipher key (cloud mode). When set, any .db file in the
|
||||
zip (plain SQLite) is re-encrypted on the way in so the
|
||||
cloud app can open it normally.
|
||||
|
||||
Returns:
|
||||
{"restored": [...], "skipped": [...]}
|
||||
|
|
@ -194,7 +276,12 @@ def restore_backup(
|
|||
skipped.append(name)
|
||||
continue
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_bytes(zf.read(name))
|
||||
raw = zf.read(name)
|
||||
if db_key and name.endswith(".db"):
|
||||
# Cloud mode: the zip contains plain SQLite — re-encrypt on restore
|
||||
_encrypt_db_from_bytes(raw, dest, db_key)
|
||||
else:
|
||||
dest.write_bytes(raw)
|
||||
restored.append(name)
|
||||
|
||||
return {"restored": restored, "skipped": skipped}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,14 @@ from __future__ import annotations
|
|||
import json
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from scripts.backup import (
|
||||
_decrypt_db_to_bytes,
|
||||
_detect_source_label,
|
||||
_encrypt_db_from_bytes,
|
||||
create_backup,
|
||||
list_backup_contents,
|
||||
restore_backup,
|
||||
|
|
@ -229,3 +232,141 @@ class TestDetectSourceLabel:
|
|||
base = tmp_path / "job-seeker"
|
||||
base.mkdir()
|
||||
assert _detect_source_label(base) == "job-seeker"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cloud mode — SQLCipher encrypt / decrypt (pysqlcipher3 mocked)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _FakeCursor:
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *a): return False
|
||||
def execute(self, *a): pass
|
||||
def fetchone(self): return None
|
||||
|
||||
|
||||
def _make_mock_sqlcipher_conn(plain_bytes: bytes, tmp_path: Path):
|
||||
"""Return a mock pysqlcipher3 connection that writes plain_bytes to the
|
||||
first 'ATTACH DATABASE' path it sees (simulating sqlcipher_export)."""
|
||||
attached: dict = {}
|
||||
|
||||
conn = MagicMock()
|
||||
|
||||
def fake_execute(sql, *args):
|
||||
if "ATTACH DATABASE" in sql:
|
||||
# Extract path between first pair of quotes
|
||||
parts = sql.split("'")
|
||||
path = parts[1]
|
||||
attached["path"] = path
|
||||
elif "sqlcipher_export" in sql:
|
||||
# Simulate export: write plain_bytes to the attached path
|
||||
Path(attached["path"]).write_bytes(plain_bytes)
|
||||
|
||||
conn.execute.side_effect = fake_execute
|
||||
conn.close = MagicMock()
|
||||
return conn
|
||||
|
||||
|
||||
class TestCloudBackup:
|
||||
"""Backup/restore with SQLCipher encryption — pysqlcipher3 mocked out."""
|
||||
|
||||
def test_create_backup_decrypts_db_when_key_set(self, tmp_path):
|
||||
"""With db_key, _decrypt_db_to_bytes is called and plain bytes go into zip."""
|
||||
base = _make_instance(tmp_path, "cloud-user")
|
||||
plain_db = b"SQLite format 3\x00plain-content"
|
||||
|
||||
with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db) as mock_dec:
|
||||
data = create_backup(base, include_db=True, db_key="testkey")
|
||||
|
||||
mock_dec.assert_called_once()
|
||||
# The zip should contain the plain bytes, not the raw encrypted file
|
||||
with zipfile.ZipFile(__import__("io").BytesIO(data)) as zf:
|
||||
db_files = [n for n in zf.namelist() if n.endswith(".db")]
|
||||
assert len(db_files) == 1
|
||||
assert zf.read(db_files[0]) == plain_db
|
||||
|
||||
def test_create_backup_no_key_reads_file_directly(self, tmp_path):
|
||||
"""Without db_key, _decrypt_db_to_bytes is NOT called."""
|
||||
base = _make_instance(tmp_path, "local-user")
|
||||
|
||||
with patch("scripts.backup._decrypt_db_to_bytes") as mock_dec:
|
||||
create_backup(base, include_db=True, db_key="")
|
||||
|
||||
mock_dec.assert_not_called()
|
||||
|
||||
def test_restore_backup_encrypts_db_when_key_set(self, tmp_path):
|
||||
"""With db_key, _encrypt_db_from_bytes is called for .db files."""
|
||||
src = _make_instance(tmp_path, "cloud-src")
|
||||
dst = tmp_path / "cloud-dst"
|
||||
dst.mkdir()
|
||||
plain_db = b"SQLite format 3\x00plain-content"
|
||||
|
||||
# Create a backup with plain DB bytes
|
||||
with patch("scripts.backup._decrypt_db_to_bytes", return_value=plain_db):
|
||||
data = create_backup(src, include_db=True, db_key="testkey")
|
||||
|
||||
with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc:
|
||||
restore_backup(data, dst, include_db=True, db_key="testkey")
|
||||
|
||||
mock_enc.assert_called_once()
|
||||
call_args = mock_enc.call_args
|
||||
assert call_args[0][0] == plain_db # plain_bytes
|
||||
assert call_args[0][2] == "testkey" # db_key
|
||||
|
||||
def test_restore_backup_no_key_writes_file_directly(self, tmp_path):
|
||||
"""Without db_key, _encrypt_db_from_bytes is NOT called."""
|
||||
src = _make_instance(tmp_path, "local-src")
|
||||
dst = tmp_path / "local-dst"
|
||||
dst.mkdir()
|
||||
data = create_backup(src, include_db=True, db_key="")
|
||||
|
||||
with patch("scripts.backup._encrypt_db_from_bytes") as mock_enc:
|
||||
restore_backup(data, dst, include_db=True, db_key="")
|
||||
|
||||
mock_enc.assert_not_called()
|
||||
|
||||
def test_decrypt_db_to_bytes_calls_sqlcipher(self, tmp_path):
|
||||
"""_decrypt_db_to_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export."""
|
||||
fake_db = tmp_path / "staging.db"
|
||||
fake_db.write_bytes(b"encrypted")
|
||||
plain_bytes = b"SQLite format 3\x00"
|
||||
|
||||
mock_conn = _make_mock_sqlcipher_conn(plain_bytes, tmp_path)
|
||||
mock_module = MagicMock()
|
||||
mock_module.connect.return_value = mock_conn
|
||||
|
||||
# Must set dbapi2 explicitly on the package mock so `from pysqlcipher3 import
|
||||
# dbapi2` resolves to mock_module (not a new auto-created MagicMock attr).
|
||||
mock_pkg = MagicMock()
|
||||
mock_pkg.dbapi2 = mock_module
|
||||
|
||||
with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}):
|
||||
result = _decrypt_db_to_bytes(fake_db, "testkey")
|
||||
|
||||
mock_module.connect.assert_called_once_with(str(fake_db))
|
||||
assert result == plain_bytes
|
||||
|
||||
def test_encrypt_db_from_bytes_calls_sqlcipher(self, tmp_path):
|
||||
"""_encrypt_db_from_bytes imports pysqlcipher3.dbapi2 and calls sqlcipher_export."""
|
||||
dest = tmp_path / "staging.db"
|
||||
plain_bytes = b"SQLite format 3\x00"
|
||||
|
||||
mock_conn = MagicMock()
|
||||
mock_module = MagicMock()
|
||||
mock_module.connect.return_value = mock_conn
|
||||
|
||||
mock_pkg = MagicMock()
|
||||
mock_pkg.dbapi2 = mock_module
|
||||
|
||||
with patch.dict("sys.modules", {"pysqlcipher3": mock_pkg, "pysqlcipher3.dbapi2": mock_module}):
|
||||
_encrypt_db_from_bytes(plain_bytes, dest, "testkey")
|
||||
|
||||
mock_module.connect.assert_called_once()
|
||||
# Verify ATTACH DATABASE call included the dest path and key
|
||||
attach_calls = [
|
||||
call for call in mock_conn.execute.call_args_list
|
||||
if "ATTACH DATABASE" in str(call)
|
||||
]
|
||||
assert len(attach_calls) == 1
|
||||
assert str(dest) in str(attach_calls[0])
|
||||
assert "testkey" in str(attach_calls[0])
|
||||
|
|
|
|||
Loading…
Reference in a new issue