pagepiper/app/api/chat.py

# app/api/chat.py
"""
RAG chat endpoint — retrieves relevant page chunks and synthesizes an answer.

BSL 1.1 — BYOK gate: requires PAGEPIPER_OLLAMA_URL or a Paid tier license.
Returns 402 with clear upgrade message if neither is configured.
"""
from __future__ import annotations

import logging
import os

from fastapi import APIRouter, HTTPException
from pydantic import BaseModel

from app.services.retriever import Retriever
from app.services.synthesizer import Synthesizer

logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/chat", tags=["chat"])


class ChatTurn(BaseModel):
    role: str  # "user" | "assistant"
    content: str


class ChatRequest(BaseModel):
    message: str
    history: list[ChatTurn] = []
    doc_ids: list[str] | None = None
    top_k: int = 5


class ChatResponse(BaseModel):
    answer: str
    citations: list[dict]


def _get_llm_router():
    """Return LLMRouter if Ollama configured, else None."""
    from app.config import get_llm_config

    cfg = get_llm_config()
    if cfg is None:
        return None
    from circuitforge_core.llm import LLMRouter

    return LLMRouter(cfg)


def _get_db_path() -> str:
    """Read lazily so test fixtures take effect."""
    import pathlib

    data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
    return str(data_dir / "pagepiper.db")


def _get_vec_db_path() -> str:
    import pathlib

    data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
    return str(data_dir / "pagepiper_vecs.db")


def _require_llm():
    """Return LLMRouter or raise 402."""
    llm = _get_llm_router()
    if llm is None:
        raise HTTPException(
            status_code=402,
            detail={
                "error": "ollama_required",
                "message": (
                    "RAG chat requires Ollama. Set PAGEPIPER_OLLAMA_URL in your .env file, "
                    "then restart. Run: ollama pull nomic-embed-text && ollama pull mistral:7b"
                ),
            },
        )
    return llm


@router.post("")
def chat(req: ChatRequest) -> ChatResponse:
    llm = _require_llm()

    from app.main import _bm25

    retriever = Retriever(_bm25)
    chunks = retriever.hybrid_search(
        query=req.message,
        top_k=req.top_k,
        doc_ids=req.doc_ids,
        db_path=_get_db_path(),
        vec_db_path=_get_vec_db_path(),
        llm=llm,
    )

    if not chunks:
        return ChatResponse(
            answer=(
                "I couldn't find any relevant passages. "
                "Try a different query or check which documents are indexed."
            ),
            citations=[],
        )

    synth = Synthesizer(llm)
    result = synth.synthesize(
        message=req.message,
        history=[t.model_dump() for t in req.history],
        chunks=chunks,
    )

    return ChatResponse(
        answer=result.answer,
        citations=[
            {
                "doc_id": c.doc_id,
                "page_number": c.page_number,
                "snippet": c.snippet,
                "bm25_score": c.bm25_score,
            }
            for c in result.citations
        ],
    )