pagepiper/app/api/chat.py
pyr0ball 0e493ab560 feat(api): add retriever, synthesizer, and chat endpoint (BSL — BYOK gate)
- app/services/retriever.py: hybrid BM25 + semantic Retriever with BM25-only fallback when llm=None
- app/services/synthesizer.py: LLM answer synthesis with citation assembly over retrieved chunks
- app/api/chat.py: POST /api/chat endpoint with 402 gate when PAGEPIPER_OLLAMA_URL is unset
- tests/test_synthesizer.py: 3 TDD unit tests (mocked LLM, context building, system prompt)
- tests/test_chat_api.py: 2 integration tests (402 without Ollama, 200 with mocked retriever+LLM)
2026-05-04 17:47:10 -07:00

126 lines
3 KiB
Python

# app/api/chat.py
"""
RAG chat endpoint — retrieves relevant page chunks and synthesizes an answer.
BSL 1.1 — BYOK gate: requires PAGEPIPER_OLLAMA_URL or a Paid tier license.
Returns 402 with clear upgrade message if neither is configured.
"""
from __future__ import annotations
import logging
import os
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from app.services.retriever import Retriever
from app.services.synthesizer import Synthesizer
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/chat", tags=["chat"])
class ChatTurn(BaseModel):
role: str # "user" | "assistant"
content: str
class ChatRequest(BaseModel):
message: str
history: list[ChatTurn] = []
doc_ids: list[str] | None = None
top_k: int = 5
class ChatResponse(BaseModel):
answer: str
citations: list[dict]
def _get_llm_router():
"""Return LLMRouter if Ollama configured, else None."""
from app.config import get_llm_config
cfg = get_llm_config()
if cfg is None:
return None
from circuitforge_core.llm import LLMRouter
return LLMRouter(cfg)
def _get_db_path() -> str:
"""Read lazily so test fixtures take effect."""
import pathlib
data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
return str(data_dir / "pagepiper.db")
def _get_vec_db_path() -> str:
import pathlib
data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
return str(data_dir / "pagepiper_vecs.db")
def _require_llm():
"""Return LLMRouter or raise 402."""
llm = _get_llm_router()
if llm is None:
raise HTTPException(
status_code=402,
detail={
"error": "ollama_required",
"message": (
"RAG chat requires Ollama. Set PAGEPIPER_OLLAMA_URL in your .env file, "
"then restart. Run: ollama pull nomic-embed-text && ollama pull mistral:7b"
),
},
)
return llm
@router.post("")
def chat(req: ChatRequest) -> ChatResponse:
llm = _require_llm()
from app.main import _bm25
retriever = Retriever(_bm25)
chunks = retriever.hybrid_search(
query=req.message,
top_k=req.top_k,
doc_ids=req.doc_ids,
db_path=_get_db_path(),
vec_db_path=_get_vec_db_path(),
llm=llm,
)
if not chunks:
return ChatResponse(
answer=(
"I couldn't find any relevant passages. "
"Try a different query or check which documents are indexed."
),
citations=[],
)
synth = Synthesizer(llm)
result = synth.synthesize(
message=req.message,
history=[t.model_dump() for t in req.history],
chunks=chunks,
)
return ChatResponse(
answer=result.answer,
citations=[
{
"doc_id": c.doc_id,
"page_number": c.page_number,
"snippet": c.snippet,
}
for c in result.citations
],
)