Implements Option B (fscrypt) from the issue design: OS-level filesystem encryption for per-user data directories on the cloud host. - app/startup.py: warn_if_unencrypted() checks for fscrypt at startup in cloud mode and logs a SECURITY warning if the users/ directory is not encrypted — catches misconfigured deployments before any data is stored - app/main.py: call warn_if_unencrypted() during lifespan in cloud mode - scripts/setup_cloud_fscrypt.sh: operator script to encrypt a user's data directory with fscrypt (run as root on host before container start); supports --list and --status subcommands Key management note: current implementation uses pam_passphrase protector. For unattended server boot, integrate a raw_key protector from a secrets manager (Vault, AWS Secrets Manager, etc.) — see script comments. SQLCipher (Option A) deferred: sqlite-vec virtual table compatibility with SQLCipher's encrypted VFS needs investigation before committing to that path.
63 lines
2.1 KiB
Python
63 lines
2.1 KiB
Python
# app/main.py
|
|
"""FastAPI application factory for pagepiper."""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI
|
|
|
|
from app.config import DB_PATH, VEC_DB_PATH, VEC_DIMENSIONS
|
|
from app.services.bm25_index import BM25Index
|
|
|
|
logger = logging.getLogger("pagepiper")
|
|
|
|
# Per-user BM25 registry — keyed by user_id; "__local__" for single-user mode
|
|
_bm25_map: dict[str, BM25Index] = {}
|
|
|
|
|
|
def _get_bm25_for(user_id: str) -> BM25Index:
|
|
if user_id not in _bm25_map:
|
|
_bm25_map[user_id] = BM25Index()
|
|
return _bm25_map[user_id]
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
from app.cloud_session import CLOUD_MODE
|
|
from app.config import LOCAL_USER_ID
|
|
from app.startup import apply_migrations, check_and_rebuild_vec_schema
|
|
|
|
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
|
|
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
|
|
|
|
if CLOUD_MODE:
|
|
from app.startup import warn_if_unencrypted
|
|
from app.config import DATA_DIR
|
|
warn_if_unencrypted(str(DATA_DIR))
|
|
else:
|
|
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
|
|
apply_migrations(DB_PATH)
|
|
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)
|
|
_get_bm25_for(LOCAL_USER_ID).mark_dirty()
|
|
|
|
yield
|
|
|
|
|
|
app = FastAPI(title="Pagepiper", lifespan=lifespan)
|
|
|
|
# Register routers
|
|
from app.api.library import router as library_router # noqa: E402
|
|
from app.api.ingest import router as ingest_router # noqa: E402
|
|
from app.api.search import router as search_router # noqa: E402
|
|
from app.api.chat import router as chat_router # noqa: E402
|
|
from app.api.feedback import router as feedback_router # noqa: E402
|
|
from app.api.feedback_attach import router as feedback_attach_router # noqa: E402
|
|
|
|
app.include_router(library_router)
|
|
app.include_router(ingest_router)
|
|
app.include_router(search_router)
|
|
app.include_router(chat_router)
|
|
app.include_router(feedback_router, prefix="/api/v1/feedback")
|
|
app.include_router(feedback_attach_router, prefix="/api/v1/feedback")
|