Retrieval:
- Add _fetch_adjacent() to retriever: fetches page ± 1 chunks from DB
after ranking so mid-sentence EPUB chunk boundaries don't lose context
- Fix vec DB doc-filter: oversample to top_k*20 before Python filter
instead of post-filtering an already-small global pool (fixes wrong-book
results when searching within a single document)
- top_k default 5 → 10; context per chunk 500 → 1500 chars; citation
snippet 200 → 400 chars
Artifact cleaning:
- Add scripts/text_clean.py: strips ABC Amber LIT Converter watermarks,
processtext.com URLs, bare page numbers, piracy stamps from extracted text
- Wire clean_paragraph() into ingest_pdf.py and new ingest_epub.py
Startup validation:
- _check_vec_schema() at boot: detects embedding dimension mismatch,
deletes stale vec DB, and queues sequential re-embed in background thread
- Sequential _reembed_docs() prevents SQLite lock races on startup re-embed
cf-orch integration:
- Wire CF_ORCH_URL / CF_LICENSE_KEY into LLMRouter backend config so
allocate() fires and keeps the Ollama model warm between requests
Ingestion progress UI:
- GET /api/library/{doc_id}/status now returns vec_count from page_vecs_meta
- DocumentCard.vue polls status every 3 s while processing and shows
two-phase progress: indeterminate animation during extraction,
determinate "Embedding N/M pages" bar once vectors start landing
Other:
- Chat feedback endpoint + thumbs up/down UI (FeedbackButton.vue)
- EPUB ingest script (ingest_epub.py) with heading-based chunking
- migration 002: chat_feedback table
- README.md with setup and feature overview
162 lines
4.1 KiB
Python
162 lines
4.1 KiB
Python
# app/api/chat.py
|
|
"""
|
|
RAG chat endpoint — retrieves relevant page chunks and synthesizes an answer.
|
|
|
|
BSL 1.1 — BYOK gate: requires PAGEPIPER_OLLAMA_URL or a Paid tier license.
|
|
Returns 402 with clear upgrade message if neither is configured.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
from pydantic import BaseModel
|
|
|
|
from app.services.retriever import Retriever
|
|
from app.services.synthesizer import Synthesizer
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/api/chat", tags=["chat"])
|
|
|
|
|
|
class ChatTurn(BaseModel):
|
|
role: str # "user" | "assistant"
|
|
content: str
|
|
|
|
|
|
class ChatRequest(BaseModel):
|
|
message: str
|
|
history: list[ChatTurn] = []
|
|
doc_ids: list[str] | None = None
|
|
top_k: int = 10
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
answer: str
|
|
citations: list[dict]
|
|
|
|
|
|
class ChatFeedbackRequest(BaseModel):
|
|
rating: int # 1 = thumbs up, -1 = thumbs down
|
|
question: str = ""
|
|
answer: str = ""
|
|
doc_ids: list[str] = []
|
|
|
|
|
|
def _get_llm_router():
|
|
"""Return LLMRouter if Ollama configured, else None."""
|
|
from app.config import get_llm_config
|
|
|
|
cfg = get_llm_config()
|
|
if cfg is None:
|
|
return None
|
|
from circuitforge_core.llm import LLMRouter
|
|
|
|
return LLMRouter(cfg)
|
|
|
|
|
|
def _get_db_path() -> str:
|
|
"""Read lazily so test fixtures take effect."""
|
|
import pathlib
|
|
|
|
data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
|
|
return str(data_dir / "pagepiper.db")
|
|
|
|
|
|
def _get_vec_db_path() -> str:
|
|
import pathlib
|
|
|
|
data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
|
|
return str(data_dir / "pagepiper_vecs.db")
|
|
|
|
|
|
def _require_llm():
|
|
"""Return LLMRouter or raise 402."""
|
|
llm = _get_llm_router()
|
|
if llm is None:
|
|
raise HTTPException(
|
|
status_code=402,
|
|
detail={
|
|
"error": "ollama_required",
|
|
"message": (
|
|
"RAG chat requires Ollama. Set PAGEPIPER_OLLAMA_URL in your .env file, "
|
|
"then restart. Run: ollama pull nomic-embed-text && ollama pull mistral:7b"
|
|
),
|
|
},
|
|
)
|
|
return llm
|
|
|
|
|
|
@router.post("")
|
|
def chat(req: ChatRequest) -> ChatResponse:
|
|
llm = _require_llm()
|
|
|
|
from app.main import _bm25
|
|
|
|
retriever = Retriever(_bm25)
|
|
chunks = retriever.hybrid_search(
|
|
query=req.message,
|
|
top_k=req.top_k,
|
|
doc_ids=req.doc_ids,
|
|
db_path=_get_db_path(),
|
|
vec_db_path=_get_vec_db_path(),
|
|
llm=llm,
|
|
)
|
|
|
|
if not chunks:
|
|
return ChatResponse(
|
|
answer=(
|
|
"I couldn't find any relevant passages. "
|
|
"Try a different query or check which documents are indexed."
|
|
),
|
|
citations=[],
|
|
)
|
|
|
|
synth = Synthesizer(llm)
|
|
result = synth.synthesize(
|
|
message=req.message,
|
|
history=[t.model_dump() for t in req.history],
|
|
chunks=chunks,
|
|
)
|
|
|
|
return ChatResponse(
|
|
answer=result.answer,
|
|
citations=[
|
|
{
|
|
"doc_id": c.doc_id,
|
|
"page_number": c.page_number,
|
|
"snippet": c.snippet,
|
|
"bm25_score": c.bm25_score,
|
|
}
|
|
for c in result.citations
|
|
],
|
|
)
|
|
|
|
|
|
@router.get("/feedback/status")
|
|
def chat_feedback_status() -> dict:
|
|
enabled = os.environ.get("PAGEPIPER_CHAT_FEEDBACK", "").lower() in ("1", "true", "yes")
|
|
return {"enabled": enabled}
|
|
|
|
|
|
@router.post("/feedback")
|
|
def submit_chat_feedback(req: ChatFeedbackRequest) -> dict:
|
|
import json
|
|
import sqlite3
|
|
|
|
if req.rating not in (1, -1):
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=422, detail="rating must be 1 or -1")
|
|
|
|
db_path = _get_db_path()
|
|
con = sqlite3.connect(db_path)
|
|
try:
|
|
con.execute(
|
|
"INSERT INTO chat_feedback (rating, question, answer, doc_ids) VALUES (?, ?, ?, ?)",
|
|
(req.rating, req.question[:2000], req.answer[:4000], json.dumps(req.doc_ids)),
|
|
)
|
|
con.commit()
|
|
finally:
|
|
con.close()
|
|
return {"ok": True}
|