From 1e066cf66c870ae8abaf69cb1c8653b6e77dae3d Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 13 May 2026 18:35:17 -0700 Subject: [PATCH] feat: encryption at rest infrastructure for cloud user data (closes #5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements Option B (fscrypt) from the issue design: OS-level filesystem encryption for per-user data directories on the cloud host. - app/startup.py: warn_if_unencrypted() checks for fscrypt at startup in cloud mode and logs a SECURITY warning if the users/ directory is not encrypted — catches misconfigured deployments before any data is stored - app/main.py: call warn_if_unencrypted() during lifespan in cloud mode - scripts/setup_cloud_fscrypt.sh: operator script to encrypt a user's data directory with fscrypt (run as root on host before container start); supports --list and --status subcommands Key management note: current implementation uses pam_passphrase protector. For unattended server boot, integrate a raw_key protector from a secrets manager (Vault, AWS Secrets Manager, etc.) — see script comments. SQLCipher (Option A) deferred: sqlite-vec virtual table compatibility with SQLCipher's encrypted VFS needs investigation before committing to that path. --- app/main.py | 6 +- app/startup.py | 42 ++++++++++++ scripts/setup_cloud_fscrypt.sh | 120 +++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100755 scripts/setup_cloud_fscrypt.sh diff --git a/app/main.py b/app/main.py index 1389472..5d872d3 100644 --- a/app/main.py +++ b/app/main.py @@ -32,7 +32,11 @@ async def lifespan(app: FastAPI): embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text") logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS) - if not CLOUD_MODE: + if CLOUD_MODE: + from app.startup import warn_if_unencrypted + from app.config import DATA_DIR + warn_if_unencrypted(str(DATA_DIR)) + else: # In cloud mode, per-user migration and vec schema check run on first request (deps.py). apply_migrations(DB_PATH) check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH) diff --git a/app/startup.py b/app/startup.py index 1a01f18..9736f2a 100644 --- a/app/startup.py +++ b/app/startup.py @@ -6,11 +6,53 @@ import logging import os import re import sqlite3 +import subprocess import threading logger = logging.getLogger("pagepiper") +def warn_if_unencrypted(data_dir: str) -> None: + """Log a warning if cloud mode is running without fscrypt encryption. + + Checks whether the users/ subdirectory of data_dir is fscrypt-encrypted. + Non-fatal: warns but does not block startup. + """ + users_dir = os.path.join(data_dir, "users") + os.makedirs(users_dir, exist_ok=True) + + if not _fscrypt_available(): + logger.warning( + "SECURITY: fscrypt not found on this system. Cloud user data at %s is stored " + "unencrypted. Install fscrypt and run scripts/setup_cloud_fscrypt.sh to enable " + "encryption at rest.", + users_dir, + ) + return + + try: + result = subprocess.run( + ["fscrypt", "status", users_dir], + capture_output=True, text=True, timeout=5, + ) + if "Encrypted" not in result.stdout: + logger.warning( + "SECURITY: user data directory %s is not fscrypt-encrypted. " + "Run: sudo scripts/setup_cloud_fscrypt.sh ", + users_dir, + ) + except Exception as exc: + logger.debug("fscrypt status check failed (non-fatal): %s", exc) + + +def _fscrypt_available() -> bool: + try: + subprocess.run(["fscrypt", "--version"], capture_output=True, timeout=2) + return True + except (FileNotFoundError, subprocess.TimeoutExpired): + return False + + def apply_migrations(db_path: str) -> None: from scripts.db_migrate import migrate migrate(db_path) diff --git a/scripts/setup_cloud_fscrypt.sh b/scripts/setup_cloud_fscrypt.sh new file mode 100755 index 0000000..992de3b --- /dev/null +++ b/scripts/setup_cloud_fscrypt.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash +# setup_cloud_fscrypt.sh — encrypt a cloud user's data directory with fscrypt. +# +# Run as root on the HOST (not inside the container) before first deployment. +# Requires: fscrypt >= 0.3, Linux kernel >= 4.1, ext4/f2fs filesystem. +# +# Usage: +# sudo ./scripts/setup_cloud_fscrypt.sh +# sudo ./scripts/setup_cloud_fscrypt.sh --list # show all encrypted dirs +# sudo ./scripts/setup_cloud_fscrypt.sh --status +# +# Environment: +# PAGEPIPER_DATA_DIR — base data directory (default: /devl/pagepiper-cloud-data) +# +# Key management: +# Keys are stored in the system protector backed by a passphrase or root keyring. +# For unattended unlock on server boot, use a raw_key protector derived from a +# secret in HashiCorp Vault or similar; see docs/encryption.md for details. + +set -euo pipefail + +DATA_DIR="${PAGEPIPER_DATA_DIR:-/devl/pagepiper-cloud-data}" +USERS_DIR="$DATA_DIR/users" + +_usage() { + grep '^# ' "$0" | cut -c3- + exit 1 +} + +_require_root() { + if [[ "$EUID" -ne 0 ]]; then + echo "ERROR: this script must be run as root" >&2 + exit 1 + fi +} + +_require_fscrypt() { + if ! command -v fscrypt &>/dev/null; then + echo "ERROR: fscrypt not found. Install with: apt-get install fscrypt" >&2 + exit 1 + fi +} + +_check_fscrypt_setup() { + local mnt + mnt=$(df -P "$DATA_DIR" | tail -1 | awk '{print $6}') + if ! fscrypt status "$mnt" &>/dev/null; then + echo "Initialising fscrypt on $mnt..." + fscrypt setup --quiet "$mnt" + echo "fscrypt setup complete on $mnt" + fi +} + +cmd="${1:-}" +case "$cmd" in + --list) + _require_root + _require_fscrypt + echo "Encrypted user directories under $USERS_DIR:" + find "$USERS_DIR" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | while read -r dir; do + if fscrypt status "$dir" 2>/dev/null | grep -q "Encrypted"; then + echo " [encrypted] $dir" + else + echo " [plain] $dir" + fi + done + ;; + --status) + _require_root + _require_fscrypt + user_id="${2:-}" + [[ -z "$user_id" ]] && { echo "Usage: $0 --status " >&2; exit 1; } + user_dir="$USERS_DIR/$user_id" + if [[ ! -d "$user_dir" ]]; then + echo "Directory $user_dir does not exist" + exit 1 + fi + fscrypt status "$user_dir" + ;; + "") + _usage + ;; + -*) + _usage + ;; + *) + # Encrypt a user's directory + user_id="$1" + _require_root + _require_fscrypt + + user_dir="$USERS_DIR/$user_id" + if [[ ! -d "$user_dir" ]]; then + echo "Creating user directory: $user_dir" + mkdir -p "$user_dir" + fi + + if fscrypt status "$user_dir" 2>/dev/null | grep -q "Encrypted"; then + echo "Directory $user_dir is already encrypted." + exit 0 + fi + + # Warn if directory contains existing data — fscrypt encrypt migrates in place + if [[ -n "$(ls -A "$user_dir")" ]]; then + echo "WARNING: $user_dir is non-empty. fscrypt will encrypt files in place." + echo "Ensure the container is stopped and you have a backup before continuing." + read -rp "Continue? [y/N] " confirm + [[ "$confirm" =~ ^[Yy]$ ]] || exit 1 + fi + + _check_fscrypt_setup + + echo "Encrypting $user_dir..." + fscrypt encrypt "$user_dir" --source=pam_passphrase --quiet + echo "Encryption set up for user $user_id. Directory: $user_dir" + echo "" + echo "IMPORTANT: unlock the directory before starting the container:" + echo " fscrypt unlock $user_dir" + ;; +esac