# app/api/search.py """ BM25 keyword search across the document library. MIT — no tier gate. No Ollama required. """ from __future__ import annotations import logging import os from typing import Annotated from fastapi import APIRouter, Depends from pydantic import BaseModel, Field from app.services.bm25_index import BM25Index logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/search", tags=["search"]) class SearchRequest(BaseModel): query: str top_k: int = Field(default=10, ge=1, le=50) doc_ids: list[str] | None = None class SearchResult(BaseModel): chunk_id: str doc_id: str page_number: int text_snippet: str # first 300 chars of the page text bm25_score: float def _get_bm25() -> BM25Index: from app.main import _bm25 return _bm25 def _get_db_path() -> str: """Read lazily so test fixtures (monkeypatch.setattr) take effect.""" import pathlib data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data")) return str(data_dir / "pagepiper.db") @router.post("") def search( req: SearchRequest, bm25: Annotated[BM25Index, Depends(_get_bm25)], ) -> list[SearchResult]: bm25.ensure_fresh(_get_db_path()) hits = bm25.query(req.query, top_k=req.top_k, doc_ids=req.doc_ids) return [ SearchResult( chunk_id=h.chunk_id, doc_id=h.doc_id, page_number=h.page_number, text_snippet=h.text[:300], bm25_score=h.score, ) for h in hits ]