pagepiper/app/deps.py
pyr0ball 8eef52a054 feat: per-user database isolation for cloud instances (closes #4)
Implements Option A from the issue design: each cloud user gets their own
data directory (DATA_DIR/users/{user_id}/) with separate pagepiper.db,
pagepiper_vecs.db, uploads/, and books/. Local mode is unchanged.

Key changes:
- app/startup.py: extract apply_migrations, reembed_docs,
  check_and_rebuild_vec_schema out of main.py (no circular imports)
- app/config.py: add LOCAL_USER_ID constant and user_data_dir() helper
- app/cloud_session.py: extract resolve_authenticated_user(); require_paid_tier
  now returns user_id (str) instead of None
- app/deps.py: add UserCtx dataclass (db_path, vec_db_path, data_dir,
  watch_dir, bm25) + get_user_ctx dependency; per-user startup guard runs
  migrations + vec schema check once per process per user
- app/main.py: _bm25 singleton -> _bm25_map dict keyed by user_id;
  add _get_bm25_for(); lifespan only runs startup checks in local mode
- app/api/library.py, search.py, chat.py: thread UserCtx through all
  endpoints; remove module-level _mark_bm25_dirty injection pattern
- tests/conftest.py: override get_user_ctx in addition to get_db so all
  endpoints get a consistent test UserCtx
2026-05-13 16:31:51 -07:00

81 lines
2.4 KiB
Python

# app/deps.py
"""FastAPI dependency providers."""
from __future__ import annotations
import sqlite3
from dataclasses import dataclass
from pathlib import Path
from typing import Generator
from fastapi import Depends, Request
from app.config import DATA_DIR, LOCAL_USER_ID
from app.services.bm25_index import BM25Index
@dataclass
class UserCtx:
"""Per-request context routing DB paths and BM25 to the right user."""
user_id: str
db_path: str
vec_db_path: str
data_dir: Path
watch_dir: Path
bm25: BM25Index
_user_startup_done: set[str] = set()
def _run_user_startup(user_id: str, user_dir: Path) -> None:
"""Run migrations and vec schema check once per process lifetime per user."""
if user_id in _user_startup_done:
return
_user_startup_done.add(user_id)
from app.config import VEC_DIMENSIONS
from app.startup import apply_migrations, check_and_rebuild_vec_schema
apply_migrations(str(user_dir / "pagepiper.db"))
check_and_rebuild_vec_schema(
str(user_dir / "pagepiper_vecs.db"), VEC_DIMENSIONS, str(user_dir / "pagepiper.db")
)
def get_user_ctx(request: Request) -> UserCtx:
"""Resolve the per-user data directory, DB paths, and BM25 instance for this request."""
import app.main as _main
from app.cloud_session import CLOUD_MODE
if CLOUD_MODE:
from app.cloud_session import resolve_authenticated_user
from app.config import user_data_dir
user_id = resolve_authenticated_user(request)
user_dir = user_data_dir(user_id)
_run_user_startup(user_id, user_dir)
watch_dir = user_dir / "books"
watch_dir.mkdir(parents=True, exist_ok=True)
else:
from app.config import WATCH_DIR
user_id = LOCAL_USER_ID
user_dir = DATA_DIR
watch_dir = WATCH_DIR
return UserCtx(
user_id=user_id,
db_path=str(user_dir / "pagepiper.db"),
vec_db_path=str(user_dir / "pagepiper_vecs.db"),
data_dir=user_dir,
watch_dir=watch_dir,
bm25=_main._get_bm25_for(user_id),
)
def get_db(ctx: UserCtx = Depends(get_user_ctx)) -> Generator[sqlite3.Connection, None, None]:
conn = sqlite3.connect(ctx.db_path, check_same_thread=False)
conn.execute("PRAGMA foreign_keys = ON")
conn.execute("PRAGMA journal_mode = WAL")
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()