Implements Option A from the issue design: each cloud user gets their own
data directory (DATA_DIR/users/{user_id}/) with separate pagepiper.db,
pagepiper_vecs.db, uploads/, and books/. Local mode is unchanged.
Key changes:
- app/startup.py: extract apply_migrations, reembed_docs,
check_and_rebuild_vec_schema out of main.py (no circular imports)
- app/config.py: add LOCAL_USER_ID constant and user_data_dir() helper
- app/cloud_session.py: extract resolve_authenticated_user(); require_paid_tier
now returns user_id (str) instead of None
- app/deps.py: add UserCtx dataclass (db_path, vec_db_path, data_dir,
watch_dir, bm25) + get_user_ctx dependency; per-user startup guard runs
migrations + vec schema check once per process per user
- app/main.py: _bm25 singleton -> _bm25_map dict keyed by user_id;
add _get_bm25_for(); lifespan only runs startup checks in local mode
- app/api/library.py, search.py, chat.py: thread UserCtx through all
endpoints; remove module-level _mark_bm25_dirty injection pattern
- tests/conftest.py: override get_user_ctx in addition to get_db so all
endpoints get a consistent test UserCtx
81 lines
2.4 KiB
Python
81 lines
2.4 KiB
Python
# app/deps.py
|
|
"""FastAPI dependency providers."""
|
|
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Generator
|
|
|
|
from fastapi import Depends, Request
|
|
|
|
from app.config import DATA_DIR, LOCAL_USER_ID
|
|
from app.services.bm25_index import BM25Index
|
|
|
|
|
|
@dataclass
|
|
class UserCtx:
|
|
"""Per-request context routing DB paths and BM25 to the right user."""
|
|
|
|
user_id: str
|
|
db_path: str
|
|
vec_db_path: str
|
|
data_dir: Path
|
|
watch_dir: Path
|
|
bm25: BM25Index
|
|
|
|
|
|
_user_startup_done: set[str] = set()
|
|
|
|
|
|
def _run_user_startup(user_id: str, user_dir: Path) -> None:
|
|
"""Run migrations and vec schema check once per process lifetime per user."""
|
|
if user_id in _user_startup_done:
|
|
return
|
|
_user_startup_done.add(user_id)
|
|
from app.config import VEC_DIMENSIONS
|
|
from app.startup import apply_migrations, check_and_rebuild_vec_schema
|
|
apply_migrations(str(user_dir / "pagepiper.db"))
|
|
check_and_rebuild_vec_schema(
|
|
str(user_dir / "pagepiper_vecs.db"), VEC_DIMENSIONS, str(user_dir / "pagepiper.db")
|
|
)
|
|
|
|
|
|
def get_user_ctx(request: Request) -> UserCtx:
|
|
"""Resolve the per-user data directory, DB paths, and BM25 instance for this request."""
|
|
import app.main as _main
|
|
from app.cloud_session import CLOUD_MODE
|
|
|
|
if CLOUD_MODE:
|
|
from app.cloud_session import resolve_authenticated_user
|
|
from app.config import user_data_dir
|
|
user_id = resolve_authenticated_user(request)
|
|
user_dir = user_data_dir(user_id)
|
|
_run_user_startup(user_id, user_dir)
|
|
watch_dir = user_dir / "books"
|
|
watch_dir.mkdir(parents=True, exist_ok=True)
|
|
else:
|
|
from app.config import WATCH_DIR
|
|
user_id = LOCAL_USER_ID
|
|
user_dir = DATA_DIR
|
|
watch_dir = WATCH_DIR
|
|
|
|
return UserCtx(
|
|
user_id=user_id,
|
|
db_path=str(user_dir / "pagepiper.db"),
|
|
vec_db_path=str(user_dir / "pagepiper_vecs.db"),
|
|
data_dir=user_dir,
|
|
watch_dir=watch_dir,
|
|
bm25=_main._get_bm25_for(user_id),
|
|
)
|
|
|
|
|
|
def get_db(ctx: UserCtx = Depends(get_user_ctx)) -> Generator[sqlite3.Connection, None, None]:
|
|
conn = sqlite3.connect(ctx.db_path, check_same_thread=False)
|
|
conn.execute("PRAGMA foreign_keys = ON")
|
|
conn.execute("PRAGMA journal_mode = WAL")
|
|
conn.row_factory = sqlite3.Row
|
|
try:
|
|
yield conn
|
|
finally:
|
|
conn.close()
|