feat: encryption at rest infrastructure for cloud user data (closes #5)

Implements Option B (fscrypt) from the issue design: OS-level filesystem
encryption for per-user data directories on the cloud host.

- app/startup.py: warn_if_unencrypted() checks for fscrypt at startup in
  cloud mode and logs a SECURITY warning if the users/ directory is not
  encrypted — catches misconfigured deployments before any data is stored
- app/main.py: call warn_if_unencrypted() during lifespan in cloud mode
- scripts/setup_cloud_fscrypt.sh: operator script to encrypt a user's
  data directory with fscrypt (run as root on host before container start);
  supports --list and --status subcommands

Key management note: current implementation uses pam_passphrase protector.
For unattended server boot, integrate a raw_key protector from a secrets
manager (Vault, AWS Secrets Manager, etc.) — see script comments.

SQLCipher (Option A) deferred: sqlite-vec virtual table compatibility with
SQLCipher's encrypted VFS needs investigation before committing to that path.
This commit is contained in:
pyr0ball 2026-05-13 18:35:17 -07:00
parent 8eef52a054
commit 1e066cf66c
3 changed files with 167 additions and 1 deletions

View file

@ -32,7 +32,11 @@ async def lifespan(app: FastAPI):
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
if not CLOUD_MODE:
if CLOUD_MODE:
from app.startup import warn_if_unencrypted
from app.config import DATA_DIR
warn_if_unencrypted(str(DATA_DIR))
else:
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
apply_migrations(DB_PATH)
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)

View file

@ -6,11 +6,53 @@ import logging
import os
import re
import sqlite3
import subprocess
import threading
logger = logging.getLogger("pagepiper")
def warn_if_unencrypted(data_dir: str) -> None:
"""Log a warning if cloud mode is running without fscrypt encryption.
Checks whether the users/ subdirectory of data_dir is fscrypt-encrypted.
Non-fatal: warns but does not block startup.
"""
users_dir = os.path.join(data_dir, "users")
os.makedirs(users_dir, exist_ok=True)
if not _fscrypt_available():
logger.warning(
"SECURITY: fscrypt not found on this system. Cloud user data at %s is stored "
"unencrypted. Install fscrypt and run scripts/setup_cloud_fscrypt.sh to enable "
"encryption at rest.",
users_dir,
)
return
try:
result = subprocess.run(
["fscrypt", "status", users_dir],
capture_output=True, text=True, timeout=5,
)
if "Encrypted" not in result.stdout:
logger.warning(
"SECURITY: user data directory %s is not fscrypt-encrypted. "
"Run: sudo scripts/setup_cloud_fscrypt.sh <user_id>",
users_dir,
)
except Exception as exc:
logger.debug("fscrypt status check failed (non-fatal): %s", exc)
def _fscrypt_available() -> bool:
try:
subprocess.run(["fscrypt", "--version"], capture_output=True, timeout=2)
return True
except (FileNotFoundError, subprocess.TimeoutExpired):
return False
def apply_migrations(db_path: str) -> None:
from scripts.db_migrate import migrate
migrate(db_path)

120
scripts/setup_cloud_fscrypt.sh Executable file
View file

@ -0,0 +1,120 @@
#!/usr/bin/env bash
# setup_cloud_fscrypt.sh — encrypt a cloud user's data directory with fscrypt.
#
# Run as root on the HOST (not inside the container) before first deployment.
# Requires: fscrypt >= 0.3, Linux kernel >= 4.1, ext4/f2fs filesystem.
#
# Usage:
# sudo ./scripts/setup_cloud_fscrypt.sh <user_id>
# sudo ./scripts/setup_cloud_fscrypt.sh --list # show all encrypted dirs
# sudo ./scripts/setup_cloud_fscrypt.sh --status <user_id>
#
# Environment:
# PAGEPIPER_DATA_DIR — base data directory (default: /devl/pagepiper-cloud-data)
#
# Key management:
# Keys are stored in the system protector backed by a passphrase or root keyring.
# For unattended unlock on server boot, use a raw_key protector derived from a
# secret in HashiCorp Vault or similar; see docs/encryption.md for details.
set -euo pipefail
DATA_DIR="${PAGEPIPER_DATA_DIR:-/devl/pagepiper-cloud-data}"
USERS_DIR="$DATA_DIR/users"
_usage() {
grep '^# ' "$0" | cut -c3-
exit 1
}
_require_root() {
if [[ "$EUID" -ne 0 ]]; then
echo "ERROR: this script must be run as root" >&2
exit 1
fi
}
_require_fscrypt() {
if ! command -v fscrypt &>/dev/null; then
echo "ERROR: fscrypt not found. Install with: apt-get install fscrypt" >&2
exit 1
fi
}
_check_fscrypt_setup() {
local mnt
mnt=$(df -P "$DATA_DIR" | tail -1 | awk '{print $6}')
if ! fscrypt status "$mnt" &>/dev/null; then
echo "Initialising fscrypt on $mnt..."
fscrypt setup --quiet "$mnt"
echo "fscrypt setup complete on $mnt"
fi
}
cmd="${1:-}"
case "$cmd" in
--list)
_require_root
_require_fscrypt
echo "Encrypted user directories under $USERS_DIR:"
find "$USERS_DIR" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | while read -r dir; do
if fscrypt status "$dir" 2>/dev/null | grep -q "Encrypted"; then
echo " [encrypted] $dir"
else
echo " [plain] $dir"
fi
done
;;
--status)
_require_root
_require_fscrypt
user_id="${2:-}"
[[ -z "$user_id" ]] && { echo "Usage: $0 --status <user_id>" >&2; exit 1; }
user_dir="$USERS_DIR/$user_id"
if [[ ! -d "$user_dir" ]]; then
echo "Directory $user_dir does not exist"
exit 1
fi
fscrypt status "$user_dir"
;;
"")
_usage
;;
-*)
_usage
;;
*)
# Encrypt a user's directory
user_id="$1"
_require_root
_require_fscrypt
user_dir="$USERS_DIR/$user_id"
if [[ ! -d "$user_dir" ]]; then
echo "Creating user directory: $user_dir"
mkdir -p "$user_dir"
fi
if fscrypt status "$user_dir" 2>/dev/null | grep -q "Encrypted"; then
echo "Directory $user_dir is already encrypted."
exit 0
fi
# Warn if directory contains existing data — fscrypt encrypt migrates in place
if [[ -n "$(ls -A "$user_dir")" ]]; then
echo "WARNING: $user_dir is non-empty. fscrypt will encrypt files in place."
echo "Ensure the container is stopped and you have a backup before continuing."
read -rp "Continue? [y/N] " confirm
[[ "$confirm" =~ ^[Yy]$ ]] || exit 1
fi
_check_fscrypt_setup
echo "Encrypting $user_dir..."
fscrypt encrypt "$user_dir" --source=pam_passphrase --quiet
echo "Encryption set up for user $user_id. Directory: $user_dir"
echo ""
echo "IMPORTANT: unlock the directory before starting the container:"
echo " fscrypt unlock $user_dir"
;;
esac