feat: encryption at rest infrastructure for cloud user data (closes #5)
Implements Option B (fscrypt) from the issue design: OS-level filesystem encryption for per-user data directories on the cloud host. - app/startup.py: warn_if_unencrypted() checks for fscrypt at startup in cloud mode and logs a SECURITY warning if the users/ directory is not encrypted — catches misconfigured deployments before any data is stored - app/main.py: call warn_if_unencrypted() during lifespan in cloud mode - scripts/setup_cloud_fscrypt.sh: operator script to encrypt a user's data directory with fscrypt (run as root on host before container start); supports --list and --status subcommands Key management note: current implementation uses pam_passphrase protector. For unattended server boot, integrate a raw_key protector from a secrets manager (Vault, AWS Secrets Manager, etc.) — see script comments. SQLCipher (Option A) deferred: sqlite-vec virtual table compatibility with SQLCipher's encrypted VFS needs investigation before committing to that path.
This commit is contained in:
parent
8eef52a054
commit
1e066cf66c
3 changed files with 167 additions and 1 deletions
|
|
@ -32,7 +32,11 @@ async def lifespan(app: FastAPI):
|
|||
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
|
||||
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
|
||||
|
||||
if not CLOUD_MODE:
|
||||
if CLOUD_MODE:
|
||||
from app.startup import warn_if_unencrypted
|
||||
from app.config import DATA_DIR
|
||||
warn_if_unencrypted(str(DATA_DIR))
|
||||
else:
|
||||
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
|
||||
apply_migrations(DB_PATH)
|
||||
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)
|
||||
|
|
|
|||
|
|
@ -6,11 +6,53 @@ import logging
|
|||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import threading
|
||||
|
||||
logger = logging.getLogger("pagepiper")
|
||||
|
||||
|
||||
def warn_if_unencrypted(data_dir: str) -> None:
|
||||
"""Log a warning if cloud mode is running without fscrypt encryption.
|
||||
|
||||
Checks whether the users/ subdirectory of data_dir is fscrypt-encrypted.
|
||||
Non-fatal: warns but does not block startup.
|
||||
"""
|
||||
users_dir = os.path.join(data_dir, "users")
|
||||
os.makedirs(users_dir, exist_ok=True)
|
||||
|
||||
if not _fscrypt_available():
|
||||
logger.warning(
|
||||
"SECURITY: fscrypt not found on this system. Cloud user data at %s is stored "
|
||||
"unencrypted. Install fscrypt and run scripts/setup_cloud_fscrypt.sh to enable "
|
||||
"encryption at rest.",
|
||||
users_dir,
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["fscrypt", "status", users_dir],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if "Encrypted" not in result.stdout:
|
||||
logger.warning(
|
||||
"SECURITY: user data directory %s is not fscrypt-encrypted. "
|
||||
"Run: sudo scripts/setup_cloud_fscrypt.sh <user_id>",
|
||||
users_dir,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("fscrypt status check failed (non-fatal): %s", exc)
|
||||
|
||||
|
||||
def _fscrypt_available() -> bool:
|
||||
try:
|
||||
subprocess.run(["fscrypt", "--version"], capture_output=True, timeout=2)
|
||||
return True
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return False
|
||||
|
||||
|
||||
def apply_migrations(db_path: str) -> None:
|
||||
from scripts.db_migrate import migrate
|
||||
migrate(db_path)
|
||||
|
|
|
|||
120
scripts/setup_cloud_fscrypt.sh
Executable file
120
scripts/setup_cloud_fscrypt.sh
Executable file
|
|
@ -0,0 +1,120 @@
|
|||
#!/usr/bin/env bash
|
||||
# setup_cloud_fscrypt.sh — encrypt a cloud user's data directory with fscrypt.
|
||||
#
|
||||
# Run as root on the HOST (not inside the container) before first deployment.
|
||||
# Requires: fscrypt >= 0.3, Linux kernel >= 4.1, ext4/f2fs filesystem.
|
||||
#
|
||||
# Usage:
|
||||
# sudo ./scripts/setup_cloud_fscrypt.sh <user_id>
|
||||
# sudo ./scripts/setup_cloud_fscrypt.sh --list # show all encrypted dirs
|
||||
# sudo ./scripts/setup_cloud_fscrypt.sh --status <user_id>
|
||||
#
|
||||
# Environment:
|
||||
# PAGEPIPER_DATA_DIR — base data directory (default: /devl/pagepiper-cloud-data)
|
||||
#
|
||||
# Key management:
|
||||
# Keys are stored in the system protector backed by a passphrase or root keyring.
|
||||
# For unattended unlock on server boot, use a raw_key protector derived from a
|
||||
# secret in HashiCorp Vault or similar; see docs/encryption.md for details.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DATA_DIR="${PAGEPIPER_DATA_DIR:-/devl/pagepiper-cloud-data}"
|
||||
USERS_DIR="$DATA_DIR/users"
|
||||
|
||||
_usage() {
|
||||
grep '^# ' "$0" | cut -c3-
|
||||
exit 1
|
||||
}
|
||||
|
||||
_require_root() {
|
||||
if [[ "$EUID" -ne 0 ]]; then
|
||||
echo "ERROR: this script must be run as root" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
_require_fscrypt() {
|
||||
if ! command -v fscrypt &>/dev/null; then
|
||||
echo "ERROR: fscrypt not found. Install with: apt-get install fscrypt" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
_check_fscrypt_setup() {
|
||||
local mnt
|
||||
mnt=$(df -P "$DATA_DIR" | tail -1 | awk '{print $6}')
|
||||
if ! fscrypt status "$mnt" &>/dev/null; then
|
||||
echo "Initialising fscrypt on $mnt..."
|
||||
fscrypt setup --quiet "$mnt"
|
||||
echo "fscrypt setup complete on $mnt"
|
||||
fi
|
||||
}
|
||||
|
||||
cmd="${1:-}"
|
||||
case "$cmd" in
|
||||
--list)
|
||||
_require_root
|
||||
_require_fscrypt
|
||||
echo "Encrypted user directories under $USERS_DIR:"
|
||||
find "$USERS_DIR" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | while read -r dir; do
|
||||
if fscrypt status "$dir" 2>/dev/null | grep -q "Encrypted"; then
|
||||
echo " [encrypted] $dir"
|
||||
else
|
||||
echo " [plain] $dir"
|
||||
fi
|
||||
done
|
||||
;;
|
||||
--status)
|
||||
_require_root
|
||||
_require_fscrypt
|
||||
user_id="${2:-}"
|
||||
[[ -z "$user_id" ]] && { echo "Usage: $0 --status <user_id>" >&2; exit 1; }
|
||||
user_dir="$USERS_DIR/$user_id"
|
||||
if [[ ! -d "$user_dir" ]]; then
|
||||
echo "Directory $user_dir does not exist"
|
||||
exit 1
|
||||
fi
|
||||
fscrypt status "$user_dir"
|
||||
;;
|
||||
"")
|
||||
_usage
|
||||
;;
|
||||
-*)
|
||||
_usage
|
||||
;;
|
||||
*)
|
||||
# Encrypt a user's directory
|
||||
user_id="$1"
|
||||
_require_root
|
||||
_require_fscrypt
|
||||
|
||||
user_dir="$USERS_DIR/$user_id"
|
||||
if [[ ! -d "$user_dir" ]]; then
|
||||
echo "Creating user directory: $user_dir"
|
||||
mkdir -p "$user_dir"
|
||||
fi
|
||||
|
||||
if fscrypt status "$user_dir" 2>/dev/null | grep -q "Encrypted"; then
|
||||
echo "Directory $user_dir is already encrypted."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Warn if directory contains existing data — fscrypt encrypt migrates in place
|
||||
if [[ -n "$(ls -A "$user_dir")" ]]; then
|
||||
echo "WARNING: $user_dir is non-empty. fscrypt will encrypt files in place."
|
||||
echo "Ensure the container is stopped and you have a backup before continuing."
|
||||
read -rp "Continue? [y/N] " confirm
|
||||
[[ "$confirm" =~ ^[Yy]$ ]] || exit 1
|
||||
fi
|
||||
|
||||
_check_fscrypt_setup
|
||||
|
||||
echo "Encrypting $user_dir..."
|
||||
fscrypt encrypt "$user_dir" --source=pam_passphrase --quiet
|
||||
echo "Encryption set up for user $user_id. Directory: $user_dir"
|
||||
echo ""
|
||||
echo "IMPORTANT: unlock the directory before starting the container:"
|
||||
echo " fscrypt unlock $user_dir"
|
||||
;;
|
||||
esac
|
||||
Loading…
Reference in a new issue