feat: encryption at rest infrastructure for cloud user data (closes #5)
Implements Option B (fscrypt) from the issue design: OS-level filesystem encryption for per-user data directories on the cloud host. - app/startup.py: warn_if_unencrypted() checks for fscrypt at startup in cloud mode and logs a SECURITY warning if the users/ directory is not encrypted — catches misconfigured deployments before any data is stored - app/main.py: call warn_if_unencrypted() during lifespan in cloud mode - scripts/setup_cloud_fscrypt.sh: operator script to encrypt a user's data directory with fscrypt (run as root on host before container start); supports --list and --status subcommands Key management note: current implementation uses pam_passphrase protector. For unattended server boot, integrate a raw_key protector from a secrets manager (Vault, AWS Secrets Manager, etc.) — see script comments. SQLCipher (Option A) deferred: sqlite-vec virtual table compatibility with SQLCipher's encrypted VFS needs investigation before committing to that path.
This commit is contained in:
parent
8eef52a054
commit
1e066cf66c
3 changed files with 167 additions and 1 deletions
|
|
@ -32,7 +32,11 @@ async def lifespan(app: FastAPI):
|
||||||
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
|
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
|
||||||
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
|
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
|
||||||
|
|
||||||
if not CLOUD_MODE:
|
if CLOUD_MODE:
|
||||||
|
from app.startup import warn_if_unencrypted
|
||||||
|
from app.config import DATA_DIR
|
||||||
|
warn_if_unencrypted(str(DATA_DIR))
|
||||||
|
else:
|
||||||
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
|
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
|
||||||
apply_migrations(DB_PATH)
|
apply_migrations(DB_PATH)
|
||||||
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)
|
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)
|
||||||
|
|
|
||||||
|
|
@ -6,11 +6,53 @@ import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
logger = logging.getLogger("pagepiper")
|
logger = logging.getLogger("pagepiper")
|
||||||
|
|
||||||
|
|
||||||
|
def warn_if_unencrypted(data_dir: str) -> None:
|
||||||
|
"""Log a warning if cloud mode is running without fscrypt encryption.
|
||||||
|
|
||||||
|
Checks whether the users/ subdirectory of data_dir is fscrypt-encrypted.
|
||||||
|
Non-fatal: warns but does not block startup.
|
||||||
|
"""
|
||||||
|
users_dir = os.path.join(data_dir, "users")
|
||||||
|
os.makedirs(users_dir, exist_ok=True)
|
||||||
|
|
||||||
|
if not _fscrypt_available():
|
||||||
|
logger.warning(
|
||||||
|
"SECURITY: fscrypt not found on this system. Cloud user data at %s is stored "
|
||||||
|
"unencrypted. Install fscrypt and run scripts/setup_cloud_fscrypt.sh to enable "
|
||||||
|
"encryption at rest.",
|
||||||
|
users_dir,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["fscrypt", "status", users_dir],
|
||||||
|
capture_output=True, text=True, timeout=5,
|
||||||
|
)
|
||||||
|
if "Encrypted" not in result.stdout:
|
||||||
|
logger.warning(
|
||||||
|
"SECURITY: user data directory %s is not fscrypt-encrypted. "
|
||||||
|
"Run: sudo scripts/setup_cloud_fscrypt.sh <user_id>",
|
||||||
|
users_dir,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("fscrypt status check failed (non-fatal): %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def _fscrypt_available() -> bool:
|
||||||
|
try:
|
||||||
|
subprocess.run(["fscrypt", "--version"], capture_output=True, timeout=2)
|
||||||
|
return True
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def apply_migrations(db_path: str) -> None:
|
def apply_migrations(db_path: str) -> None:
|
||||||
from scripts.db_migrate import migrate
|
from scripts.db_migrate import migrate
|
||||||
migrate(db_path)
|
migrate(db_path)
|
||||||
|
|
|
||||||
120
scripts/setup_cloud_fscrypt.sh
Executable file
120
scripts/setup_cloud_fscrypt.sh
Executable file
|
|
@ -0,0 +1,120 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# setup_cloud_fscrypt.sh — encrypt a cloud user's data directory with fscrypt.
|
||||||
|
#
|
||||||
|
# Run as root on the HOST (not inside the container) before first deployment.
|
||||||
|
# Requires: fscrypt >= 0.3, Linux kernel >= 4.1, ext4/f2fs filesystem.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# sudo ./scripts/setup_cloud_fscrypt.sh <user_id>
|
||||||
|
# sudo ./scripts/setup_cloud_fscrypt.sh --list # show all encrypted dirs
|
||||||
|
# sudo ./scripts/setup_cloud_fscrypt.sh --status <user_id>
|
||||||
|
#
|
||||||
|
# Environment:
|
||||||
|
# PAGEPIPER_DATA_DIR — base data directory (default: /devl/pagepiper-cloud-data)
|
||||||
|
#
|
||||||
|
# Key management:
|
||||||
|
# Keys are stored in the system protector backed by a passphrase or root keyring.
|
||||||
|
# For unattended unlock on server boot, use a raw_key protector derived from a
|
||||||
|
# secret in HashiCorp Vault or similar; see docs/encryption.md for details.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DATA_DIR="${PAGEPIPER_DATA_DIR:-/devl/pagepiper-cloud-data}"
|
||||||
|
USERS_DIR="$DATA_DIR/users"
|
||||||
|
|
||||||
|
_usage() {
|
||||||
|
grep '^# ' "$0" | cut -c3-
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
_require_root() {
|
||||||
|
if [[ "$EUID" -ne 0 ]]; then
|
||||||
|
echo "ERROR: this script must be run as root" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
_require_fscrypt() {
|
||||||
|
if ! command -v fscrypt &>/dev/null; then
|
||||||
|
echo "ERROR: fscrypt not found. Install with: apt-get install fscrypt" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
_check_fscrypt_setup() {
|
||||||
|
local mnt
|
||||||
|
mnt=$(df -P "$DATA_DIR" | tail -1 | awk '{print $6}')
|
||||||
|
if ! fscrypt status "$mnt" &>/dev/null; then
|
||||||
|
echo "Initialising fscrypt on $mnt..."
|
||||||
|
fscrypt setup --quiet "$mnt"
|
||||||
|
echo "fscrypt setup complete on $mnt"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd="${1:-}"
|
||||||
|
case "$cmd" in
|
||||||
|
--list)
|
||||||
|
_require_root
|
||||||
|
_require_fscrypt
|
||||||
|
echo "Encrypted user directories under $USERS_DIR:"
|
||||||
|
find "$USERS_DIR" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | while read -r dir; do
|
||||||
|
if fscrypt status "$dir" 2>/dev/null | grep -q "Encrypted"; then
|
||||||
|
echo " [encrypted] $dir"
|
||||||
|
else
|
||||||
|
echo " [plain] $dir"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
;;
|
||||||
|
--status)
|
||||||
|
_require_root
|
||||||
|
_require_fscrypt
|
||||||
|
user_id="${2:-}"
|
||||||
|
[[ -z "$user_id" ]] && { echo "Usage: $0 --status <user_id>" >&2; exit 1; }
|
||||||
|
user_dir="$USERS_DIR/$user_id"
|
||||||
|
if [[ ! -d "$user_dir" ]]; then
|
||||||
|
echo "Directory $user_dir does not exist"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fscrypt status "$user_dir"
|
||||||
|
;;
|
||||||
|
"")
|
||||||
|
_usage
|
||||||
|
;;
|
||||||
|
-*)
|
||||||
|
_usage
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# Encrypt a user's directory
|
||||||
|
user_id="$1"
|
||||||
|
_require_root
|
||||||
|
_require_fscrypt
|
||||||
|
|
||||||
|
user_dir="$USERS_DIR/$user_id"
|
||||||
|
if [[ ! -d "$user_dir" ]]; then
|
||||||
|
echo "Creating user directory: $user_dir"
|
||||||
|
mkdir -p "$user_dir"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if fscrypt status "$user_dir" 2>/dev/null | grep -q "Encrypted"; then
|
||||||
|
echo "Directory $user_dir is already encrypted."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Warn if directory contains existing data — fscrypt encrypt migrates in place
|
||||||
|
if [[ -n "$(ls -A "$user_dir")" ]]; then
|
||||||
|
echo "WARNING: $user_dir is non-empty. fscrypt will encrypt files in place."
|
||||||
|
echo "Ensure the container is stopped and you have a backup before continuing."
|
||||||
|
read -rp "Continue? [y/N] " confirm
|
||||||
|
[[ "$confirm" =~ ^[Yy]$ ]] || exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
_check_fscrypt_setup
|
||||||
|
|
||||||
|
echo "Encrypting $user_dir..."
|
||||||
|
fscrypt encrypt "$user_dir" --source=pam_passphrase --quiet
|
||||||
|
echo "Encryption set up for user $user_id. Directory: $user_dir"
|
||||||
|
echo ""
|
||||||
|
echo "IMPORTANT: unlock the directory before starting the container:"
|
||||||
|
echo " fscrypt unlock $user_dir"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
Loading…
Reference in a new issue