pagepiper/app/api/search.py

# app/api/search.py
"""
BM25 keyword search across the document library.

MIT — no tier gate. No Ollama required.
"""
from __future__ import annotations

import logging

from fastapi import APIRouter, Depends
from pydantic import BaseModel, Field

from app.deps import UserCtx, get_user_ctx

logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/search", tags=["search"])


class SearchRequest(BaseModel):
    query: str
    top_k: int = Field(default=10, ge=1, le=50)
    doc_ids: list[str] | None = None


class SearchResult(BaseModel):
    chunk_id: str
    doc_id: str
    page_number: int
    text_snippet: str
    bm25_score: float


@router.post("")
def search(
    req: SearchRequest,
    ctx: UserCtx = Depends(get_user_ctx),
) -> list[SearchResult]:
    ctx.bm25.ensure_fresh(ctx.db_path)
    hits = ctx.bm25.query(req.query, top_k=req.top_k, doc_ids=req.doc_ids)
    return [
        SearchResult(
            chunk_id=h.chunk_id,
            doc_id=h.doc_id,
            page_number=h.page_number,
            text_snippet=(h.text or "")[:300],
            bm25_score=h.score,
        )
        for h in hits
    ]