# app/api/chat.py """ RAG chat endpoint — retrieves relevant page chunks and synthesizes an answer. BSL 1.1 — BYOK gate: requires PAGEPIPER_OLLAMA_URL or a Paid tier license. Returns 402 with clear upgrade message if neither is configured. """ from __future__ import annotations import logging import os from fastapi import APIRouter, HTTPException from pydantic import BaseModel from app.services.retriever import Retriever from app.services.synthesizer import Synthesizer logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/chat", tags=["chat"]) class ChatTurn(BaseModel): role: str # "user" | "assistant" content: str class ChatRequest(BaseModel): message: str history: list[ChatTurn] = [] doc_ids: list[str] | None = None top_k: int = 5 class ChatResponse(BaseModel): answer: str citations: list[dict] def _get_llm_router(): """Return LLMRouter if Ollama configured, else None.""" from app.config import get_llm_config cfg = get_llm_config() if cfg is None: return None from circuitforge_core.llm import LLMRouter return LLMRouter(cfg) def _get_db_path() -> str: """Read lazily so test fixtures take effect.""" import pathlib data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data")) return str(data_dir / "pagepiper.db") def _get_vec_db_path() -> str: import pathlib data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data")) return str(data_dir / "pagepiper_vecs.db") def _require_llm(): """Return LLMRouter or raise 402.""" llm = _get_llm_router() if llm is None: raise HTTPException( status_code=402, detail={ "error": "ollama_required", "message": ( "RAG chat requires Ollama. Set PAGEPIPER_OLLAMA_URL in your .env file, " "then restart. Run: ollama pull nomic-embed-text && ollama pull mistral:7b" ), }, ) return llm @router.post("") def chat(req: ChatRequest) -> ChatResponse: llm = _require_llm() from app.main import _bm25 retriever = Retriever(_bm25) chunks = retriever.hybrid_search( query=req.message, top_k=req.top_k, doc_ids=req.doc_ids, db_path=_get_db_path(), vec_db_path=_get_vec_db_path(), llm=llm, ) if not chunks: return ChatResponse( answer=( "I couldn't find any relevant passages. " "Try a different query or check which documents are indexed." ), citations=[], ) synth = Synthesizer(llm) result = synth.synthesize( message=req.message, history=[t.model_dump() for t in req.history], chunks=chunks, ) return ChatResponse( answer=result.answer, citations=[ { "doc_id": c.doc_id, "page_number": c.page_number, "snippet": c.snippet, "bm25_score": c.bm25_score, } for c in result.citations ], )