pagepiper/app/main.py
pyr0ball 1e066cf66c feat: encryption at rest infrastructure for cloud user data (closes #5)
Implements Option B (fscrypt) from the issue design: OS-level filesystem
encryption for per-user data directories on the cloud host.

- app/startup.py: warn_if_unencrypted() checks for fscrypt at startup in
  cloud mode and logs a SECURITY warning if the users/ directory is not
  encrypted — catches misconfigured deployments before any data is stored
- app/main.py: call warn_if_unencrypted() during lifespan in cloud mode
- scripts/setup_cloud_fscrypt.sh: operator script to encrypt a user's
  data directory with fscrypt (run as root on host before container start);
  supports --list and --status subcommands

Key management note: current implementation uses pam_passphrase protector.
For unattended server boot, integrate a raw_key protector from a secrets
manager (Vault, AWS Secrets Manager, etc.) — see script comments.

SQLCipher (Option A) deferred: sqlite-vec virtual table compatibility with
SQLCipher's encrypted VFS needs investigation before committing to that path.
2026-05-13 18:35:17 -07:00

63 lines
2.1 KiB
Python

# app/main.py
"""FastAPI application factory for pagepiper."""
from __future__ import annotations
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.config import DB_PATH, VEC_DB_PATH, VEC_DIMENSIONS
from app.services.bm25_index import BM25Index
logger = logging.getLogger("pagepiper")
# Per-user BM25 registry — keyed by user_id; "__local__" for single-user mode
_bm25_map: dict[str, BM25Index] = {}
def _get_bm25_for(user_id: str) -> BM25Index:
if user_id not in _bm25_map:
_bm25_map[user_id] = BM25Index()
return _bm25_map[user_id]
@asynccontextmanager
async def lifespan(app: FastAPI):
from app.cloud_session import CLOUD_MODE
from app.config import LOCAL_USER_ID
from app.startup import apply_migrations, check_and_rebuild_vec_schema
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
if CLOUD_MODE:
from app.startup import warn_if_unencrypted
from app.config import DATA_DIR
warn_if_unencrypted(str(DATA_DIR))
else:
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
apply_migrations(DB_PATH)
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)
_get_bm25_for(LOCAL_USER_ID).mark_dirty()
yield
app = FastAPI(title="Pagepiper", lifespan=lifespan)
# Register routers
from app.api.library import router as library_router # noqa: E402
from app.api.ingest import router as ingest_router # noqa: E402
from app.api.search import router as search_router # noqa: E402
from app.api.chat import router as chat_router # noqa: E402
from app.api.feedback import router as feedback_router # noqa: E402
from app.api.feedback_attach import router as feedback_attach_router # noqa: E402
app.include_router(library_router)
app.include_router(ingest_router)
app.include_router(search_router)
app.include_router(chat_router)
app.include_router(feedback_router, prefix="/api/v1/feedback")
app.include_router(feedback_attach_router, prefix="/api/v1/feedback")