pagepiper/app/main.py
pyr0ball 8eef52a054 feat: per-user database isolation for cloud instances (closes #4)
Implements Option A from the issue design: each cloud user gets their own
data directory (DATA_DIR/users/{user_id}/) with separate pagepiper.db,
pagepiper_vecs.db, uploads/, and books/. Local mode is unchanged.

Key changes:
- app/startup.py: extract apply_migrations, reembed_docs,
  check_and_rebuild_vec_schema out of main.py (no circular imports)
- app/config.py: add LOCAL_USER_ID constant and user_data_dir() helper
- app/cloud_session.py: extract resolve_authenticated_user(); require_paid_tier
  now returns user_id (str) instead of None
- app/deps.py: add UserCtx dataclass (db_path, vec_db_path, data_dir,
  watch_dir, bm25) + get_user_ctx dependency; per-user startup guard runs
  migrations + vec schema check once per process per user
- app/main.py: _bm25 singleton -> _bm25_map dict keyed by user_id;
  add _get_bm25_for(); lifespan only runs startup checks in local mode
- app/api/library.py, search.py, chat.py: thread UserCtx through all
  endpoints; remove module-level _mark_bm25_dirty injection pattern
- tests/conftest.py: override get_user_ctx in addition to get_db so all
  endpoints get a consistent test UserCtx
2026-05-13 16:31:51 -07:00

59 lines
2 KiB
Python

# app/main.py
"""FastAPI application factory for pagepiper."""
from __future__ import annotations
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.config import DB_PATH, VEC_DB_PATH, VEC_DIMENSIONS
from app.services.bm25_index import BM25Index
logger = logging.getLogger("pagepiper")
# Per-user BM25 registry — keyed by user_id; "__local__" for single-user mode
_bm25_map: dict[str, BM25Index] = {}
def _get_bm25_for(user_id: str) -> BM25Index:
if user_id not in _bm25_map:
_bm25_map[user_id] = BM25Index()
return _bm25_map[user_id]
@asynccontextmanager
async def lifespan(app: FastAPI):
from app.cloud_session import CLOUD_MODE
from app.config import LOCAL_USER_ID
from app.startup import apply_migrations, check_and_rebuild_vec_schema
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
if not CLOUD_MODE:
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
apply_migrations(DB_PATH)
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)
_get_bm25_for(LOCAL_USER_ID).mark_dirty()
yield
app = FastAPI(title="Pagepiper", lifespan=lifespan)
# Register routers
from app.api.library import router as library_router # noqa: E402
from app.api.ingest import router as ingest_router # noqa: E402
from app.api.search import router as search_router # noqa: E402
from app.api.chat import router as chat_router # noqa: E402
from app.api.feedback import router as feedback_router # noqa: E402
from app.api.feedback_attach import router as feedback_attach_router # noqa: E402
app.include_router(library_router)
app.include_router(ingest_router)
app.include_router(search_router)
app.include_router(chat_router)
app.include_router(feedback_router, prefix="/api/v1/feedback")
app.include_router(feedback_attach_router, prefix="/api/v1/feedback")