Implements Option A from the issue design: each cloud user gets their own
data directory (DATA_DIR/users/{user_id}/) with separate pagepiper.db,
pagepiper_vecs.db, uploads/, and books/. Local mode is unchanged.
Key changes:
- app/startup.py: extract apply_migrations, reembed_docs,
check_and_rebuild_vec_schema out of main.py (no circular imports)
- app/config.py: add LOCAL_USER_ID constant and user_data_dir() helper
- app/cloud_session.py: extract resolve_authenticated_user(); require_paid_tier
now returns user_id (str) instead of None
- app/deps.py: add UserCtx dataclass (db_path, vec_db_path, data_dir,
watch_dir, bm25) + get_user_ctx dependency; per-user startup guard runs
migrations + vec schema check once per process per user
- app/main.py: _bm25 singleton -> _bm25_map dict keyed by user_id;
add _get_bm25_for(); lifespan only runs startup checks in local mode
- app/api/library.py, search.py, chat.py: thread UserCtx through all
endpoints; remove module-level _mark_bm25_dirty injection pattern
- tests/conftest.py: override get_user_ctx in addition to get_db so all
endpoints get a consistent test UserCtx
59 lines
2 KiB
Python
59 lines
2 KiB
Python
# app/main.py
|
|
"""FastAPI application factory for pagepiper."""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI
|
|
|
|
from app.config import DB_PATH, VEC_DB_PATH, VEC_DIMENSIONS
|
|
from app.services.bm25_index import BM25Index
|
|
|
|
logger = logging.getLogger("pagepiper")
|
|
|
|
# Per-user BM25 registry — keyed by user_id; "__local__" for single-user mode
|
|
_bm25_map: dict[str, BM25Index] = {}
|
|
|
|
|
|
def _get_bm25_for(user_id: str) -> BM25Index:
|
|
if user_id not in _bm25_map:
|
|
_bm25_map[user_id] = BM25Index()
|
|
return _bm25_map[user_id]
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
from app.cloud_session import CLOUD_MODE
|
|
from app.config import LOCAL_USER_ID
|
|
from app.startup import apply_migrations, check_and_rebuild_vec_schema
|
|
|
|
embed_model = os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text")
|
|
logger.info("Pagepiper starting — embed model: %s, dims: %d", embed_model, VEC_DIMENSIONS)
|
|
|
|
if not CLOUD_MODE:
|
|
# In cloud mode, per-user migration and vec schema check run on first request (deps.py).
|
|
apply_migrations(DB_PATH)
|
|
check_and_rebuild_vec_schema(VEC_DB_PATH, VEC_DIMENSIONS, DB_PATH)
|
|
_get_bm25_for(LOCAL_USER_ID).mark_dirty()
|
|
|
|
yield
|
|
|
|
|
|
app = FastAPI(title="Pagepiper", lifespan=lifespan)
|
|
|
|
# Register routers
|
|
from app.api.library import router as library_router # noqa: E402
|
|
from app.api.ingest import router as ingest_router # noqa: E402
|
|
from app.api.search import router as search_router # noqa: E402
|
|
from app.api.chat import router as chat_router # noqa: E402
|
|
from app.api.feedback import router as feedback_router # noqa: E402
|
|
from app.api.feedback_attach import router as feedback_attach_router # noqa: E402
|
|
|
|
app.include_router(library_router)
|
|
app.include_router(ingest_router)
|
|
app.include_router(search_router)
|
|
app.include_router(chat_router)
|
|
app.include_router(feedback_router, prefix="/api/v1/feedback")
|
|
app.include_router(feedback_attach_router, prefix="/api/v1/feedback")
|