Implements Option A from the issue design: each cloud user gets their own
data directory (DATA_DIR/users/{user_id}/) with separate pagepiper.db,
pagepiper_vecs.db, uploads/, and books/. Local mode is unchanged.
Key changes:
- app/startup.py: extract apply_migrations, reembed_docs,
check_and_rebuild_vec_schema out of main.py (no circular imports)
- app/config.py: add LOCAL_USER_ID constant and user_data_dir() helper
- app/cloud_session.py: extract resolve_authenticated_user(); require_paid_tier
now returns user_id (str) instead of None
- app/deps.py: add UserCtx dataclass (db_path, vec_db_path, data_dir,
watch_dir, bm25) + get_user_ctx dependency; per-user startup guard runs
migrations + vec schema check once per process per user
- app/main.py: _bm25 singleton -> _bm25_map dict keyed by user_id;
add _get_bm25_for(); lifespan only runs startup checks in local mode
- app/api/library.py, search.py, chat.py: thread UserCtx through all
endpoints; remove module-level _mark_bm25_dirty injection pattern
- tests/conftest.py: override get_user_ctx in addition to get_db so all
endpoints get a consistent test UserCtx
50 lines
1.1 KiB
Python
50 lines
1.1 KiB
Python
# app/api/search.py
|
|
"""
|
|
BM25 keyword search across the document library.
|
|
|
|
MIT — no tier gate. No Ollama required.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
from fastapi import APIRouter, Depends
|
|
from pydantic import BaseModel, Field
|
|
|
|
from app.deps import UserCtx, get_user_ctx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/api/search", tags=["search"])
|
|
|
|
|
|
class SearchRequest(BaseModel):
|
|
query: str
|
|
top_k: int = Field(default=10, ge=1, le=50)
|
|
doc_ids: list[str] | None = None
|
|
|
|
|
|
class SearchResult(BaseModel):
|
|
chunk_id: str
|
|
doc_id: str
|
|
page_number: int
|
|
text_snippet: str
|
|
bm25_score: float
|
|
|
|
|
|
@router.post("")
|
|
def search(
|
|
req: SearchRequest,
|
|
ctx: UserCtx = Depends(get_user_ctx),
|
|
) -> list[SearchResult]:
|
|
ctx.bm25.ensure_fresh(ctx.db_path)
|
|
hits = ctx.bm25.query(req.query, top_k=req.top_k, doc_ids=req.doc_ids)
|
|
return [
|
|
SearchResult(
|
|
chunk_id=h.chunk_id,
|
|
doc_id=h.doc_id,
|
|
page_number=h.page_number,
|
|
text_snippet=(h.text or "")[:300],
|
|
bm25_score=h.score,
|
|
)
|
|
for h in hits
|
|
]
|