Compare commits

..

No commits in common. "2e24808d913853398ab83b570246ec6716aab719" and "abeb6089e56e3fa70698f0613e41847ac3b94219" have entirely different histories.

44 changed files with 2 additions and 5457 deletions

View file

@ -1,22 +0,0 @@
# pagepiper cloud environment — copy to .env and fill in secrets
# Used by: docker compose -f compose.cloud.yml -p pagepiper-cloud ...
# Data directories (host paths, bind-mounted into the api container)
PAGEPIPER_DATA_DIR=/devl/pagepiper-cloud-data
PAGEPIPER_BOOKS_DIR=/devl/pagepiper-cloud-data/books
# BYOK gate — set to enable hybrid search and RAG chat (BSL feature)
# Leave blank to run BM25-only mode (MIT, no Ollama required)
PAGEPIPER_OLLAMA_URL=
# Embedding and chat model selection (only used when PAGEPIPER_OLLAMA_URL is set)
PAGEPIPER_EMBED_MODEL=nomic-embed-text
PAGEPIPER_CHAT_MODEL=mistral:7b
# Heimdall license server (optional — for per-user tier validation)
HEIMDALL_URL=https://license.circuitforge.tech
HEIMDALL_ADMIN_TOKEN=
# cf-orch streaming proxy — coordinator product key
# Must match COORDINATOR_PRODUCT_KEYS["pagepiper"] in cf-orch.env on the coordinator
COORDINATOR_PAGEPIPER_KEY=

View file

@ -1,127 +0,0 @@
# app/api/chat.py
"""
RAG chat endpoint retrieves relevant page chunks and synthesizes an answer.
BSL 1.1 BYOK gate: requires PAGEPIPER_OLLAMA_URL or a Paid tier license.
Returns 402 with clear upgrade message if neither is configured.
"""
from __future__ import annotations
import logging
import os
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from app.services.retriever import Retriever
from app.services.synthesizer import Synthesizer
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/chat", tags=["chat"])
class ChatTurn(BaseModel):
role: str # "user" | "assistant"
content: str
class ChatRequest(BaseModel):
message: str
history: list[ChatTurn] = []
doc_ids: list[str] | None = None
top_k: int = 5
class ChatResponse(BaseModel):
answer: str
citations: list[dict]
def _get_llm_router():
"""Return LLMRouter if Ollama configured, else None."""
from app.config import get_llm_config
cfg = get_llm_config()
if cfg is None:
return None
from circuitforge_core.llm import LLMRouter
return LLMRouter(cfg)
def _get_db_path() -> str:
"""Read lazily so test fixtures take effect."""
import pathlib
data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
return str(data_dir / "pagepiper.db")
def _get_vec_db_path() -> str:
import pathlib
data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
return str(data_dir / "pagepiper_vecs.db")
def _require_llm():
"""Return LLMRouter or raise 402."""
llm = _get_llm_router()
if llm is None:
raise HTTPException(
status_code=402,
detail={
"error": "ollama_required",
"message": (
"RAG chat requires Ollama. Set PAGEPIPER_OLLAMA_URL in your .env file, "
"then restart. Run: ollama pull nomic-embed-text && ollama pull mistral:7b"
),
},
)
return llm
@router.post("")
def chat(req: ChatRequest) -> ChatResponse:
llm = _require_llm()
from app.main import _bm25
retriever = Retriever(_bm25)
chunks = retriever.hybrid_search(
query=req.message,
top_k=req.top_k,
doc_ids=req.doc_ids,
db_path=_get_db_path(),
vec_db_path=_get_vec_db_path(),
llm=llm,
)
if not chunks:
return ChatResponse(
answer=(
"I couldn't find any relevant passages. "
"Try a different query or check which documents are indexed."
),
citations=[],
)
synth = Synthesizer(llm)
result = synth.synthesize(
message=req.message,
history=[t.model_dump() for t in req.history],
chunks=chunks,
)
return ChatResponse(
answer=result.answer,
citations=[
{
"doc_id": c.doc_id,
"page_number": c.page_number,
"snippet": c.snippet,
"bm25_score": c.bm25_score,
}
for c in result.citations
],
)

View file

@ -1,24 +0,0 @@
# app/api/ingest.py
"""Ingest job status polling (proxies cf-orch or checks in-memory registry)."""
from __future__ import annotations
from fastapi import APIRouter, HTTPException
router = APIRouter(prefix="/api/ingest", tags=["ingest"])
# Populated by _run_ingest_background when cf-orch is unavailable
_task_registry: dict[str, dict] = {}
@router.get("/{task_id}")
def get_task_status(task_id: str) -> dict:
# Check in-memory registry first (BackgroundTasks fallback)
if task_id in _task_registry:
return _task_registry[task_id]
# Try cf-orch
try:
from circuitforge_core.tasks import get_task_status as orch_status # type: ignore[import]
return orch_status(task_id)
except Exception:
raise HTTPException(status_code=404, detail="Task not found")

View file

@ -1,179 +0,0 @@
# app/api/library.py
"""
Document library management API.
All endpoints in this module are MIT no tier gate.
"""
from __future__ import annotations
import logging
import sqlite3
import uuid
from pathlib import Path
from typing import Callable
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from app.config import WATCH_DIR, DB_PATH, VEC_DB_PATH
from app.deps import get_db
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/library", tags=["library"])
# Injected by main.py after _bm25 is created
_mark_bm25_dirty: Callable[[], None] | None = None
def _dispatch_ingest(
doc_id: str,
file_path: str,
background_tasks: BackgroundTasks,
) -> str:
"""Dispatch an ingest task. Tries cf-orch; falls back to BackgroundTasks."""
import os as _os
from pathlib import Path as _Path
# Read lazily so test fixtures (monkeypatch.setenv) take effect
_data_dir = _Path(_os.environ.get("PAGEPIPER_DATA_DIR", "data"))
task_id = str(uuid.uuid4())
args = {
"doc_id": doc_id,
"file_path": file_path,
"db_path": str(_data_dir / "pagepiper.db"),
"vec_db_path": str(_data_dir / "pagepiper_vecs.db"),
}
try:
from circuitforge_core.tasks import dispatch_task # type: ignore[import]
task_id = dispatch_task(caller="pagepiper/ingest_pdf", args=args)
logger.info("Dispatched cf-orch ingest task %s for doc %s", task_id, doc_id)
except Exception:
from scripts.ingest_pdf import run as run_ingest
background_tasks.add_task(_run_ingest_background, run_ingest, args, task_id)
logger.info(
"cf-orch unavailable — running ingest in background thread (task %s)", task_id
)
return task_id
def _run_ingest_background(run_fn: Callable[..., None], args: dict, task_id: str) -> None:
from app.api.ingest import _task_registry
_task_registry[task_id] = {"status": "running", "progress": 0}
try:
run_fn(**args)
_task_registry[task_id] = {"status": "complete", "progress": 100}
if _mark_bm25_dirty:
_mark_bm25_dirty()
except Exception as exc:
logger.exception("Ingest task %s failed", task_id)
_task_registry[task_id] = {"status": "error", "error": str(exc)}
@router.get("")
def list_library(db: sqlite3.Connection = Depends(get_db)) -> list[dict]:
rows = db.execute(
"SELECT id, title, file_path, status, task_id, page_count, created_at"
" FROM documents ORDER BY created_at DESC"
).fetchall()
return [dict(r) for r in rows]
@router.post("/scan", status_code=202)
def scan_library(
background_tasks: BackgroundTasks,
db: sqlite3.Connection = Depends(get_db),
) -> dict:
"""Scan the watched directory and queue ingest for any new PDFs."""
watch = WATCH_DIR
if not watch.exists():
raise HTTPException(status_code=404, detail=f"Watch directory not found: {watch}")
pdfs = list(watch.glob("**/*.pdf"))
queued = []
for pdf_path in pdfs:
path_str = str(pdf_path.resolve())
existing = db.execute(
"SELECT id, status FROM documents WHERE file_path = ?", [path_str]
).fetchone()
if existing and existing["status"] == "ready":
continue # already indexed
if existing:
doc_id = existing["id"]
else:
title = pdf_path.stem.replace("_", " ").replace("-", " ").title()
doc_id = db.execute(
"INSERT INTO documents(title, file_path, status) VALUES (?,?,?) RETURNING id",
[title, path_str, "pending"],
).fetchone()[0]
db.commit()
task_id = _dispatch_ingest(doc_id, path_str, background_tasks)
db.execute(
"UPDATE documents SET status='processing', task_id=? WHERE id=?",
[task_id, doc_id],
)
db.commit()
queued.append({"doc_id": doc_id, "task_id": task_id})
return {"discovered": len(pdfs), "queued": len(queued), "tasks": queued}
@router.post("/{doc_id}/reingest", status_code=202)
def reingest_document(
doc_id: str,
background_tasks: BackgroundTasks,
db: sqlite3.Connection = Depends(get_db),
) -> dict:
row = db.execute("SELECT file_path FROM documents WHERE id=?", [doc_id]).fetchone()
if not row:
raise HTTPException(status_code=404, detail="Document not found")
task_id = _dispatch_ingest(doc_id, row["file_path"], background_tasks)
db.execute(
"UPDATE documents SET status='processing', task_id=?, error_msg=NULL WHERE id=?",
[task_id, doc_id],
)
db.commit()
return {"doc_id": doc_id, "task_id": task_id}
@router.delete("/{doc_id}", status_code=204)
def delete_document(
doc_id: str,
db: sqlite3.Connection = Depends(get_db),
) -> None:
row = db.execute("SELECT id FROM documents WHERE id=?", [doc_id]).fetchone()
if not row:
raise HTTPException(status_code=404, detail="Document not found")
db.execute("DELETE FROM documents WHERE id=?", [doc_id])
db.commit()
# Remove embeddings from vector store
try:
from circuitforge_core.vector.sqlite_vec import LocalSQLiteVecStore # type: ignore[import]
store = LocalSQLiteVecStore(db_path=VEC_DB_PATH, table="page_vecs", dimensions=768)
store.delete_where({"doc_id": doc_id})
except Exception as exc:
logger.warning("Could not remove vectors for doc %s: %s", doc_id, exc)
if _mark_bm25_dirty:
_mark_bm25_dirty()
@router.get("/{doc_id}/status")
def document_status(
doc_id: str,
db: sqlite3.Connection = Depends(get_db),
) -> dict:
row = db.execute(
"SELECT id, status, task_id, page_count, error_msg FROM documents WHERE id=?",
[doc_id],
).fetchone()
if not row:
raise HTTPException(status_code=404, detail="Document not found")
return dict(row)

View file

@ -1,67 +0,0 @@
# app/api/search.py
"""
BM25 keyword search across the document library.
MIT no tier gate. No Ollama required.
"""
from __future__ import annotations
import logging
import os
from typing import Annotated
from fastapi import APIRouter, Depends
from pydantic import BaseModel, Field
from app.services.bm25_index import BM25Index
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/search", tags=["search"])
class SearchRequest(BaseModel):
query: str
top_k: int = Field(default=10, ge=1, le=50)
doc_ids: list[str] | None = None
class SearchResult(BaseModel):
chunk_id: str
doc_id: str
page_number: int
text_snippet: str # first 300 chars of the page text
bm25_score: float
def _get_bm25() -> BM25Index:
import app.main as _main
bm25 = getattr(_main, "_bm25", None)
if bm25 is None:
raise RuntimeError("BM25 index not initialised — app.main not loaded")
return bm25
def _get_db_path() -> str:
"""Read lazily so test fixtures (monkeypatch.setattr) take effect."""
import pathlib
data_dir = pathlib.Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
return str(data_dir / "pagepiper.db")
@router.post("")
def search(
req: SearchRequest,
bm25: Annotated[BM25Index, Depends(_get_bm25)],
) -> list[SearchResult]:
bm25.ensure_fresh(_get_db_path())
hits = bm25.query(req.query, top_k=req.top_k, doc_ids=req.doc_ids)
return [
SearchResult(
chunk_id=h.chunk_id,
doc_id=h.doc_id,
page_number=h.page_number,
text_snippet=(h.text or "")[:300],
bm25_score=h.score,
)
for h in hits
]

View file

@ -1,19 +0,0 @@
# app/deps.py
"""FastAPI dependency providers."""
from __future__ import annotations
import sqlite3
from typing import Generator
from app.config import DB_PATH
def get_db() -> Generator[sqlite3.Connection, None, None]:
conn = sqlite3.connect(DB_PATH)
conn.execute("PRAGMA foreign_keys = ON")
conn.execute("PRAGMA journal_mode = WAL")
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()

View file

@ -1,46 +0,0 @@
# app/main.py
"""FastAPI application factory for pagepiper."""
from __future__ import annotations
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.config import DB_PATH
from app.services.bm25_index import BM25Index
logger = logging.getLogger("pagepiper")
# Module-level BM25 singleton — shared across all requests
_bm25 = BM25Index()
def _apply_migrations() -> None:
from scripts.db_migrate import migrate
migrate(DB_PATH)
@asynccontextmanager
async def lifespan(app: FastAPI):
_apply_migrations()
_bm25.mark_dirty() # will rebuild on first search
yield
app = FastAPI(title="Pagepiper", lifespan=lifespan)
# Wire BM25 dirty callback into library router
from app.api import library as _lib_module # noqa: E402
_lib_module._mark_bm25_dirty = _bm25.mark_dirty
# Register routers
from app.api.library import router as library_router # noqa: E402
from app.api.ingest import router as ingest_router # noqa: E402
from app.api.search import router as search_router # noqa: E402
from app.api.chat import router as chat_router # noqa: E402
app.include_router(library_router)
app.include_router(ingest_router)
app.include_router(search_router)
app.include_router(chat_router)

View file

@ -1,102 +0,0 @@
"""
BM25 keyword search over the page_chunks corpus.
MIT no tier gate. Available to all users with no Ollama required.
"""
from __future__ import annotations
import logging
import sqlite3
from dataclasses import dataclass
from rank_bm25 import BM25Okapi
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class BM25Result:
"""A single BM25 search result."""
chunk_id: str
doc_id: str
page_number: int
text: str
score: float
class BM25Index:
"""
In-memory BM25 index over page_chunks. Rebuilt lazily on demand.
Thread-safety note: rebuilt synchronously in the request thread. For
single-user local deployments this is acceptable.
"""
def __init__(self) -> None:
self._index: BM25Okapi | None = None
self._chunks: list[dict] = []
self._dirty: bool = True
def mark_dirty(self) -> None:
"""Signal that the index needs rebuilding (call after any ingest completes)."""
self._dirty = True
def ensure_fresh(self, db_path: str) -> None:
"""Rebuild from SQLite if dirty."""
if not self._dirty:
return
try:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT id, doc_id, page_number, text FROM page_chunks ORDER BY doc_id, page_number"
).fetchall()
finally:
conn.close()
except sqlite3.Error as exc:
logger.error("BM25 index rebuild failed: %s", exc)
return
self._load_chunks([dict(r) for r in rows])
self._dirty = False
logger.info("BM25 index rebuilt: %d chunks", len(self._chunks))
def _load_chunks(self, chunks: list[dict]) -> None:
self._chunks = chunks
tokenized = [c["text"].lower().split() for c in chunks]
self._index = BM25Okapi(tokenized) if tokenized else None
def query(
self,
query_text: str,
top_k: int = 10,
doc_ids: list[str] | None = None,
) -> list[BM25Result]:
"""Search the corpus. Returns results sorted by descending BM25 score."""
if not self._index or not self._chunks:
return []
scores = self._index.get_scores(query_text.lower().split())
ranked = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)
results: list[BM25Result] = []
for i, score in ranked:
if score <= 0:
continue
c = self._chunks[i]
if doc_ids is not None and c["doc_id"] not in doc_ids:
continue
results.append(
BM25Result(
chunk_id=c["id"],
doc_id=c["doc_id"],
page_number=c["page_number"],
text=c["text"],
score=float(score),
)
)
if len(results) >= top_k:
break
return results

View file

@ -1,123 +0,0 @@
# app/services/retriever.py
"""
Hybrid BM25 + semantic retriever.
BSL 1.1 semantic path requires PAGEPIPER_OLLAMA_URL (BYOK gate).
BM25-only path is MIT and has no gate.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from app.services.bm25_index import BM25Index
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class RetrievedChunk:
"""A chunk returned by the retriever, with source scores."""
chunk_id: str
doc_id: str
page_number: int
text: str
bm25_score: float
vector_score: float | None
class Retriever:
def __init__(self, bm25: BM25Index) -> None:
self._bm25 = bm25
def hybrid_search(
self,
query: str,
top_k: int,
doc_ids: list[str] | None,
db_path: str,
vec_db_path: str,
llm, # LLMRouter | None — caller must pass
) -> list[RetrievedChunk]:
"""
Merge BM25 and semantic results.
Falls back to BM25-only if llm is None.
"""
if llm is None:
return self._bm25_only(query, top_k, doc_ids, db_path)
from circuitforge_core.vector.sqlite_vec import LocalSQLiteVecStore
self._bm25.ensure_fresh(db_path)
bm25_hits = {
r.chunk_id: r
for r in self._bm25.query(query, top_k=top_k * 2, doc_ids=doc_ids)
}
vec = llm.embed([query])[0]
store = LocalSQLiteVecStore(db_path=vec_db_path, table="page_vecs", dimensions=768)
filter_meta = {"doc_id": doc_ids[0]} if doc_ids and len(doc_ids) == 1 else None
vec_hits = store.query(vec, top_k=top_k * 2, filter_metadata=filter_meta)
if doc_ids and len(doc_ids) > 1:
vec_hits = [h for h in vec_hits if h.metadata.get("doc_id") in doc_ids]
# Merge: BM25 hits take priority; vector hits fill in additional results
merged: dict[str, RetrievedChunk] = {}
for cid, r in bm25_hits.items():
merged[cid] = RetrievedChunk(
chunk_id=cid,
doc_id=r.doc_id,
page_number=r.page_number,
text=r.text,
bm25_score=r.score,
vector_score=None,
)
for vh in vec_hits:
# _chunks is the loaded list of dicts from BM25Index; no public accessor exists
text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "")
if vh.id in merged:
existing = merged[vh.id]
merged[vh.id] = RetrievedChunk(
chunk_id=existing.chunk_id,
doc_id=existing.doc_id,
page_number=existing.page_number,
text=existing.text,
bm25_score=existing.bm25_score,
vector_score=vh.score,
)
else:
merged[vh.id] = RetrievedChunk(
chunk_id=vh.id,
doc_id=vh.metadata.get("doc_id", ""),
page_number=int(vh.metadata.get("page_number", 0)),
text=text,
bm25_score=0.0,
vector_score=vh.score,
)
def _combined(r: RetrievedChunk) -> float:
bm25 = r.bm25_score
# sqlite-vec returns L2 distance (lower=better); invert to [0,1] higher-is-better
vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0
return bm25 * 0.5 + vec * 0.5
ranked = sorted(merged.values(), key=_combined, reverse=True)
return ranked[:top_k]
def _bm25_only(
self, query: str, top_k: int, doc_ids: list[str] | None, db_path: str
) -> list[RetrievedChunk]:
self._bm25.ensure_fresh(db_path)
return [
RetrievedChunk(
chunk_id=r.chunk_id,
doc_id=r.doc_id,
page_number=r.page_number,
text=r.text,
bm25_score=r.score,
vector_score=None,
)
for r in self._bm25.query(query, top_k=top_k, doc_ids=doc_ids)
]

View file

@ -1,60 +0,0 @@
# app/services/synthesizer.py
"""
LLM answer synthesis over retrieved chunks.
BSL 1.1 requires LLMRouter (Ollama BYOK or cloud tier).
"""
from __future__ import annotations
from dataclasses import dataclass
from app.services.retriever import RetrievedChunk
_SYSTEM_PROMPT = (
"You are a helpful document assistant. "
"Answer the user's question using ONLY the provided document excerpts. "
"For each claim, cite the source page as [p.N]. "
"If the excerpts are insufficient, say so. Do not invent information."
)
@dataclass(frozen=True)
class Citation:
doc_id: str
page_number: int
snippet: str
bm25_score: float
@dataclass(frozen=True)
class SynthesisResult:
answer: str
citations: tuple[Citation, ...]
class Synthesizer:
def __init__(self, llm) -> None: # LLMRouter
self._llm = llm
def synthesize(
self,
message: str,
history: list[dict],
chunks: list[RetrievedChunk],
) -> SynthesisResult:
context_parts = [f"[p.{c.page_number}]\n{c.text[:500]}" for c in chunks]
context = "\n\n---\n\n".join(context_parts)
prompt = f"Document excerpts:\n\n{context}\n\nQuestion: {message}"
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
citations = tuple(
Citation(
doc_id=c.doc_id,
page_number=c.page_number,
snippet=c.text[:200],
bm25_score=c.bm25_score,
)
for c in chunks
)
return SynthesisResult(answer=answer, citations=citations)

View file

@ -1,51 +0,0 @@
# Pagepiper — cloud managed instance
# Project: pagepiper-cloud (docker compose -f compose.cloud.yml -p pagepiper-cloud ...)
# Web: http://127.0.0.1:8533 → pagepiper.circuitforge.tech (primary)
# → menagerie.circuitforge.tech/pagepiper (secondary)
# API: internal only on pagepiper-cloud-net (nginx proxies /api/ → api:8522)
services:
api:
build:
context: ..
dockerfile: pagepiper/Dockerfile
restart: unless-stopped
env_file: .env
environment:
CLOUD_MODE: "true"
PAGEPIPER_DATA_DIR: /devl/pagepiper-cloud-data
PAGEPIPER_BOOKS_DIR: /devl/pagepiper-cloud-data/books
# PAGEPIPER_OLLAMA_URL — set in .env (BYOK gate for hybrid search + RAG)
# HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN — set in .env for license validation
# cf-orch: route LLM inference through coordinator for managed GPU access
CF_ORCH_URL: http://host.docker.internal:7700
CF_APP_NAME: pagepiper
COORDINATOR_URL: http://10.1.10.71:7700
COORDINATOR_PAGEPIPER_KEY: ${COORDINATOR_PAGEPIPER_KEY:-}
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- /devl/pagepiper-cloud-data:/devl/pagepiper-cloud-data
- ${HOME}/.config/circuitforge:/root/.config/circuitforge:ro
networks:
- pagepiper-cloud-net
web:
build:
context: .
dockerfile: docker/web/Dockerfile
args:
VITE_BASE_URL: /pagepiper
VITE_API_BASE: /pagepiper
NGINX_CONF: docker/web/nginx.cloud.conf
restart: unless-stopped
ports:
- "8533:80"
networks:
- pagepiper-cloud-net
depends_on:
- api
networks:
pagepiper-cloud-net:
driver: bridge

View file

@ -14,7 +14,6 @@ RUN npm run build
# Stage 2: serve via nginx # Stage 2: serve via nginx
FROM nginx:alpine FROM nginx:alpine
ARG NGINX_CONF=docker/web/nginx.conf COPY docker/web/nginx.conf /etc/nginx/conf.d/default.conf
COPY ${NGINX_CONF} /etc/nginx/conf.d/default.conf
COPY --from=build /app/dist /usr/share/nginx/html COPY --from=build /app/dist /usr/share/nginx/html
EXPOSE 80 EXPOSE 80

View file

@ -1,59 +0,0 @@
server {
listen 80;
server_name _;
root /usr/share/nginx/html;
index index.html;
# API requests when accessed via Caddy (prefix already stripped by handle_path)
location /api/ {
proxy_pass http://api:8522;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $http_x_real_ip;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
proxy_set_header X-CF-Session $http_x_cf_session;
client_max_body_size 50m;
# PDF uploads and LLM inference can be slow
proxy_read_timeout 300s;
proxy_send_timeout 300s;
}
# API requests when accessed directly via pagepiper.circuitforge.tech
# VITE_API_BASE=/pagepiper means frontend builds calls as /pagepiper/api/...
# Caddy passes these unchanged; nginx strips /pagepiper prefix here.
location /pagepiper/api/ {
rewrite ^/pagepiper(/api/.*)$ $1 break;
proxy_pass http://api:8522;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $http_x_real_ip;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
proxy_set_header X-CF-Session $http_x_cf_session;
client_max_body_size 50m;
proxy_read_timeout 300s;
proxy_send_timeout 300s;
}
# Static assets at the /pagepiper/ base — used when Caddy does NOT strip the prefix
# (i.e., pagepiper.circuitforge.tech routes, where /pagepiper is the Vite base URL).
# ^~ prevents regex asset location from matching first.
location ^~ /pagepiper/ {
alias /usr/share/nginx/html/;
try_files $uri $uri/ /index.html;
}
location = /index.html {
add_header Cache-Control "no-cache, no-store, must-revalidate";
try_files $uri /index.html;
}
location / {
try_files $uri $uri/ /index.html;
}
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
}

View file

@ -3,17 +3,13 @@ set -euo pipefail
SERVICE=pagepiper SERVICE=pagepiper
WEB_PORT=8521 WEB_PORT=8521
CLOUD_WEB_PORT=8533
COMPOSE_FILE="compose.yml" COMPOSE_FILE="compose.yml"
COMPOSE_CLOUD_FILE="compose.cloud.yml"
CLOUD_PROJECT="pagepiper-cloud"
OVERRIDE_ARGS=() OVERRIDE_ARGS=()
[[ -f "compose.override.yml" ]] && OVERRIDE_ARGS=(-f compose.override.yml) [[ -f "compose.override.yml" ]] && OVERRIDE_ARGS=(-f compose.override.yml)
usage() { usage() {
echo "Usage: $0 {start|stop|restart|status|logs [svc]|open|build|test" echo "Usage: $0 {start|stop|restart|status|logs [svc]|open|build|test}"
echo " |cloud:start|cloud:stop|cloud:restart|cloud:status|cloud:logs [svc]|cloud:build}"
exit 1 exit 1
} }
@ -48,27 +44,6 @@ case "$cmd" in
test) test)
conda run -n cf pytest tests/ -v conda run -n cf pytest tests/ -v
;; ;;
cloud:start)
docker compose -f "$COMPOSE_CLOUD_FILE" -p "$CLOUD_PROJECT" up -d --build
echo "Pagepiper cloud running → http://localhost:${CLOUD_WEB_PORT}"
;;
cloud:stop)
docker compose -f "$COMPOSE_CLOUD_FILE" -p "$CLOUD_PROJECT" down
;;
cloud:restart)
docker compose -f "$COMPOSE_CLOUD_FILE" -p "$CLOUD_PROJECT" down
docker compose -f "$COMPOSE_CLOUD_FILE" -p "$CLOUD_PROJECT" up -d --build
echo "Pagepiper cloud running → http://localhost:${CLOUD_WEB_PORT}"
;;
cloud:status)
docker compose -f "$COMPOSE_CLOUD_FILE" -p "$CLOUD_PROJECT" ps
;;
cloud:logs)
docker compose -f "$COMPOSE_CLOUD_FILE" -p "$CLOUD_PROJECT" logs -f "${1:-}"
;;
cloud:build)
docker compose -f "$COMPOSE_CLOUD_FILE" -p "$CLOUD_PROJECT" build --no-cache
;;
*) *)
usage usage
;; ;;

View file

@ -1,154 +0,0 @@
# scripts/ingest_pdf.py
"""
cf-orch task: pagepiper/ingest_pdf
Extracts text from a PDF, stores page chunks in SQLite, and (if Ollama is
configured) generates embeddings and stores them in the sqlite-vec store.
Entry point:
python scripts/ingest_pdf.py --doc-id X --file-path Y --db-path Z --vec-db-path W
"""
from __future__ import annotations
import logging
import os
import sqlite3
from pathlib import Path
logger = logging.getLogger("pagepiper.ingest")
# Pages to embed per Ollama API call — avoids hitting request size limits on large PDFs
EMBED_BATCH_SIZE = 64
def _update_status(
conn: sqlite3.Connection,
doc_id: str,
status: str,
page_count: int | None = None,
error_msg: str | None = None,
) -> None:
if page_count is not None:
conn.execute(
"UPDATE documents SET status=?, page_count=?, updated_at=datetime('now') WHERE id=?",
[status, page_count, doc_id],
)
elif error_msg is not None:
conn.execute(
"UPDATE documents SET status=?, error_msg=?, updated_at=datetime('now') WHERE id=?",
[status, error_msg, doc_id],
)
else:
conn.execute(
"UPDATE documents SET status=?, updated_at=datetime('now') WHERE id=?",
[status, doc_id],
)
conn.commit()
def run(doc_id: str, file_path: str, db_path: str, vec_db_path: str) -> None:
"""Run the full ingest pipeline for one PDF. Called by cf-orch or BackgroundTasks."""
from circuitforge_core.documents.pdf import PDFExtractor
conn: sqlite3.Connection | None = None
try:
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA foreign_keys = ON")
_update_status(conn, doc_id, "processing")
# Step 1: Extract page chunks
logger.info("Extracting text from %s", file_path)
extractor = PDFExtractor(ocr_min_words=10)
chunks = extractor.chunk_pages(file_path)
logger.info("Extracted %d pages", len(chunks))
# Step 2: Store chunks (replace any existing for this doc)
conn.execute("DELETE FROM page_chunks WHERE doc_id=?", [doc_id])
chunk_rows: list[tuple[str, int, str]] = []
for chunk in chunks:
row = conn.execute(
"""INSERT INTO page_chunks(doc_id, page_number, text, source, word_count)
VALUES (?,?,?,?,?) RETURNING id""",
[doc_id, chunk.page_number, chunk.text, chunk.source, chunk.word_count],
).fetchone()
chunk_rows.append((row[0], chunk.page_number, chunk.text))
conn.commit()
# Step 3: Embed and store vectors if Ollama is configured (BYOK gate)
ollama_url = os.environ.get("PAGEPIPER_OLLAMA_URL", "").strip()
if ollama_url and chunks:
logger.info("Embedding %d pages via Ollama at %s", len(chunks), ollama_url)
from circuitforge_core.llm import LLMRouter
from circuitforge_core.vector.sqlite_vec import LocalSQLiteVecStore
_clean = ollama_url.rstrip("/")
base_url = _clean if _clean.endswith("/v1") else _clean + "/v1"
router = LLMRouter({
"fallback_order": ["ollama"],
"backends": {
"ollama": {
"type": "openai_compat",
"base_url": base_url,
"model": os.environ.get("PAGEPIPER_CHAT_MODEL", "mistral:7b"),
"embedding_model": os.environ.get(
"PAGEPIPER_EMBED_MODEL", "nomic-embed-text"
),
"supports_images": False,
}
},
})
vec_store = LocalSQLiteVecStore(
db_path=vec_db_path, table="page_vecs", dimensions=768
)
# Remove old vectors before re-inserting. If embedding fails mid-way,
# old vectors are gone but new ones are partial — re-ingest recovers.
vec_store.delete_where({"doc_id": doc_id})
texts = [text for _, _, text in chunk_rows]
vectors: list[list[float]] = []
for i in range(0, len(texts), EMBED_BATCH_SIZE):
vectors.extend(router.embed(texts[i : i + EMBED_BATCH_SIZE]))
for (chunk_id, page_number, _), vector in zip(chunk_rows, vectors):
vec_store.upsert(
id=chunk_id,
vector=vector,
metadata={"doc_id": doc_id, "page_number": page_number},
)
logger.info("Stored %d embeddings", len(vectors))
_update_status(conn, doc_id, "ready", page_count=len(chunks))
logger.info("Ingest complete for doc %s (%d pages)", doc_id, len(chunks))
except Exception as exc:
logger.error("Ingest failed for doc %s: %s", doc_id, exc, exc_info=True)
if conn is not None:
try:
_update_status(conn, doc_id, "error", error_msg=str(exc))
except Exception:
logger.warning("Could not write error status for doc %s", doc_id)
raise
finally:
if conn is not None:
conn.close()
if __name__ == "__main__":
import argparse
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
description="Ingest a PDF (cf-orch task entry point)"
)
parser.add_argument("--doc-id", required=True)
parser.add_argument("--file-path", required=True)
parser.add_argument("--db-path", required=True)
parser.add_argument("--vec-db-path", required=True)
a = parser.parse_args()
run(
doc_id=a.doc_id,
file_path=a.file_path,
db_path=a.db_path,
vec_db_path=a.vec_db_path,
)

View file

@ -1,48 +0,0 @@
# tests/conftest.py
"""Shared fixtures for pagepiper test suite."""
from __future__ import annotations
import sqlite3
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def test_db(tmp_path) -> str:
db_path = str(tmp_path / "test.db")
schema = Path("migrations/001_initial_schema.sql").read_text()
conn = sqlite3.connect(db_path)
conn.executescript(schema)
conn.commit()
conn.close()
return db_path
@pytest.fixture
def client(test_db, tmp_path, monkeypatch):
monkeypatch.setenv("PAGEPIPER_DATA_DIR", str(tmp_path))
monkeypatch.setenv("PAGEPIPER_WATCH_DIR", str(tmp_path / "books"))
(tmp_path / "books").mkdir(exist_ok=True)
import app.main as _main_module
from app.main import app, _bm25
from app.deps import get_db
# Suppress migrations during tests — test_db fixture already applies the schema
monkeypatch.setattr(_main_module, "_apply_migrations", lambda: None)
def override_db():
conn = sqlite3.connect(test_db)
conn.execute("PRAGMA foreign_keys = ON")
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()
app.dependency_overrides[get_db] = override_db
_bm25.mark_dirty() # clear any state from previous tests
yield TestClient(app)
app.dependency_overrides.clear()

View file

@ -1,96 +0,0 @@
"""Tests for app.services.bm25_index."""
from __future__ import annotations
import pytest
from app.services.bm25_index import BM25Index, BM25Result
def _seeded_index() -> BM25Index:
idx = BM25Index()
idx._load_chunks(
[
{
"id": "c1",
"doc_id": "book-a",
"page_number": 1,
"text": "Fireball deals 8d6 fire damage on a failed Dexterity saving throw.",
},
{
"id": "c2",
"doc_id": "book-a",
"page_number": 2,
"text": "A wizard can cast one spell per turn unless they have Action Surge.",
},
{
"id": "c3",
"doc_id": "book-b",
"page_number": 5,
"text": "Grapple rules apply when the attacker uses the Attack action to grab a target.",
},
]
)
return idx
def test_query_returns_relevant_result():
idx = _seeded_index()
results = idx.query("fireball fire damage")
assert len(results) >= 1
assert results[0].chunk_id == "c1"
assert results[0].score > 0
def test_query_respects_top_k():
# "action" matches all three chunks; top_k=2 must hard-cap the result list
idx = _seeded_index()
results = idx.query("action", top_k=2)
assert len(results) == 2
def test_query_filters_by_doc_id():
idx = _seeded_index()
results = idx.query("rules", doc_ids=["book-b"])
assert all(r.doc_id == "book-b" for r in results)
def test_query_empty_corpus_returns_empty():
idx = BM25Index()
idx._load_chunks([])
results = idx.query("anything")
assert results == []
def test_mark_dirty_triggers_rebuild(tmp_path):
import sqlite3
db_path = str(tmp_path / "test.db")
conn = sqlite3.connect(db_path)
conn.execute(
"CREATE TABLE page_chunks(id TEXT, doc_id TEXT, page_number INT, text TEXT)"
)
conn.execute(
"INSERT INTO page_chunks VALUES ('x1','doc-1',1,'Ranger favored enemy favored terrain terrain bonuses bonuses action attack')"
)
conn.execute(
"INSERT INTO page_chunks VALUES ('x2','doc-1',2,'Wizard can cast spells and perform actions')"
)
conn.execute(
"INSERT INTO page_chunks VALUES ('x3','doc-1',3,'Fighter attacks and deals damage with weapon')"
)
conn.commit()
conn.close()
idx = BM25Index()
idx.mark_dirty()
idx.ensure_fresh(db_path)
results = idx.query("ranger terrain")
assert len(results) >= 1
assert results[0].chunk_id == "x1"
def test_bm25_result_is_frozen():
r = BM25Result(chunk_id="x", doc_id="d", page_number=1, text="hello", score=0.5)
with pytest.raises(Exception):
r.score = 1.0 # type: ignore[misc]

View file

@ -1,59 +0,0 @@
# tests/test_chat_api.py
"""Tests for POST /api/chat — RAG chat (BSL, BYOK gate)."""
from __future__ import annotations
import sqlite3
from unittest.mock import MagicMock, patch
from app.services.retriever import RetrievedChunk
def test_chat_returns_402_without_ollama(client, monkeypatch):
monkeypatch.delenv("PAGEPIPER_OLLAMA_URL", raising=False)
resp = client.post("/api/chat", json={"message": "How does Fireball work?", "history": []})
assert resp.status_code == 402
body = resp.json()
assert "detail" in body
assert "Ollama" in body["detail"]["message"]
def test_chat_returns_answer_with_mocked_ollama(client, test_db, monkeypatch):
monkeypatch.setenv("PAGEPIPER_OLLAMA_URL", "http://localhost:11434")
conn = sqlite3.connect(test_db)
conn.execute(
"INSERT OR IGNORE INTO documents(id, title, file_path, status) VALUES ('b1','PHB','phb.pdf','ready')"
)
conn.execute(
"INSERT INTO page_chunks(doc_id, page_number, text, source, word_count) "
"VALUES ('b1',15,'Fireball deals 8d6 fire damage.','text_layer',6)"
)
conn.commit()
conn.close()
mock_llm = MagicMock()
mock_llm.complete.return_value = "Fireball deals 8d6 fire damage [p.15]."
mock_chunks = [
RetrievedChunk(
chunk_id="c1",
doc_id="b1",
page_number=15,
text="Fireball deals 8d6 fire damage.",
bm25_score=1.0,
vector_score=None,
)
]
with patch("app.api.chat.Retriever.hybrid_search", return_value=mock_chunks):
with patch("app.api.chat._get_llm_router", return_value=mock_llm):
resp = client.post(
"/api/chat",
json={"message": "How does Fireball work?", "history": [], "doc_ids": ["b1"]},
)
assert resp.status_code == 200
body = resp.json()
assert "answer" in body
assert "citations" in body
assert "Fireball" in body["answer"]

View file

@ -1,138 +0,0 @@
# tests/test_ingest.py
"""Unit tests for scripts/ingest_pdf.py."""
from __future__ import annotations
import sqlite3
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from scripts.ingest_pdf import run
@pytest.fixture
def ingest_db(tmp_path) -> tuple[str, str]:
db_path = str(tmp_path / "test.db")
schema = Path("migrations/001_initial_schema.sql").read_text()
conn = sqlite3.connect(db_path)
conn.executescript(schema)
conn.execute(
"INSERT INTO documents(id, title, file_path, status) VALUES ('d1','Test','test.pdf','pending')"
)
conn.commit()
conn.close()
vec_db_path = str(tmp_path / "vecs.db")
return db_path, vec_db_path
def _make_mock_chunk(page_number: int = 1, text: str = "Some page text about rules.") -> MagicMock:
chunk = MagicMock()
chunk.page_number = page_number
chunk.text = text
chunk.source = "text_layer"
chunk.word_count = len(text.split())
return chunk
def test_ingest_sets_status_ready_on_success(ingest_db):
db_path, vec_db_path = ingest_db
mock_extractor = MagicMock()
mock_extractor.chunk_pages.return_value = [_make_mock_chunk()]
with patch("circuitforge_core.documents.pdf.PDFExtractor", return_value=mock_extractor):
run(doc_id="d1", file_path="test.pdf", db_path=db_path, vec_db_path=vec_db_path)
conn = sqlite3.connect(db_path)
row = conn.execute("SELECT status, page_count FROM documents WHERE id='d1'").fetchone()
conn.close()
assert row[0] == "ready"
assert row[1] == 1
def test_ingest_stores_page_chunks(ingest_db):
db_path, vec_db_path = ingest_db
mock_extractor = MagicMock()
chunks = [_make_mock_chunk(page_number=i + 1, text=f"Page {i+1} text content.") for i in range(3)]
mock_extractor.chunk_pages.return_value = chunks
with patch("circuitforge_core.documents.pdf.PDFExtractor", return_value=mock_extractor):
run(doc_id="d1", file_path="test.pdf", db_path=db_path, vec_db_path=vec_db_path)
conn = sqlite3.connect(db_path)
rows = conn.execute(
"SELECT page_number, text FROM page_chunks WHERE doc_id='d1' ORDER BY page_number"
).fetchall()
conn.close()
assert len(rows) == 3
assert rows[0][0] == 1
assert "Page 1" in rows[0][1]
def test_ingest_sets_error_status_on_failure(ingest_db):
db_path, vec_db_path = ingest_db
with patch("circuitforge_core.documents.pdf.PDFExtractor", side_effect=RuntimeError("PDF corrupt")):
from scripts.ingest_pdf import run
with pytest.raises(RuntimeError):
run(doc_id="d1", file_path="bad.pdf", db_path=db_path, vec_db_path=vec_db_path)
conn = sqlite3.connect(db_path)
row = conn.execute("SELECT status, error_msg FROM documents WHERE id='d1'").fetchone()
conn.close()
assert row[0] == "error"
assert "PDF corrupt" in row[1]
def test_ingest_skips_embeddings_without_ollama_url(ingest_db, monkeypatch):
"""When PAGEPIPER_OLLAMA_URL is unset, no vec DB file should be created."""
db_path, vec_db_path = ingest_db
monkeypatch.delenv("PAGEPIPER_OLLAMA_URL", raising=False)
mock_extractor = MagicMock()
mock_extractor.chunk_pages.return_value = [_make_mock_chunk()]
with patch("circuitforge_core.documents.pdf.PDFExtractor", return_value=mock_extractor):
run(doc_id="d1", file_path="test.pdf", db_path=db_path, vec_db_path=vec_db_path)
# No embeddings were requested, so the vec DB should not have been created
assert not Path(vec_db_path).exists(), "vec DB should not be created without OLLAMA_URL"
# Document should still be ready with chunks stored
conn = sqlite3.connect(db_path)
status = conn.execute("SELECT status FROM documents WHERE id='d1'").fetchone()[0]
chunk_count = conn.execute(
"SELECT COUNT(*) FROM page_chunks WHERE doc_id='d1'"
).fetchone()[0]
conn.close()
assert status == "ready"
assert chunk_count == 1
def test_ingest_replaces_existing_chunks_on_reingest(ingest_db):
"""Re-running ingest for the same doc_id replaces old page_chunks."""
db_path, vec_db_path = ingest_db
mock_extractor = MagicMock()
# First ingest: 3 pages
mock_extractor.chunk_pages.return_value = [
_make_mock_chunk(page_number=i + 1, text=f"Original page {i+1}.") for i in range(3)
]
with patch("circuitforge_core.documents.pdf.PDFExtractor", return_value=mock_extractor):
run(doc_id="d1", file_path="test.pdf", db_path=db_path, vec_db_path=vec_db_path)
# Second ingest: 1 page (simulating a re-ingest after file change)
mock_extractor.chunk_pages.return_value = [_make_mock_chunk(text="Updated single page.")]
with patch("circuitforge_core.documents.pdf.PDFExtractor", return_value=mock_extractor):
run(doc_id="d1", file_path="test.pdf", db_path=db_path, vec_db_path=vec_db_path)
conn = sqlite3.connect(db_path)
rows = conn.execute(
"SELECT text FROM page_chunks WHERE doc_id='d1'"
).fetchall()
conn.close()
assert len(rows) == 1
assert "Updated" in rows[0][0]

View file

@ -1,68 +0,0 @@
# tests/test_library_api.py
"""Tests for GET/POST /api/library endpoints."""
from __future__ import annotations
import sqlite3
def _add_doc(db_path: str, title: str, path: str, status: str = "ready") -> str:
conn = sqlite3.connect(db_path)
doc_id = conn.execute(
"INSERT INTO documents(title, file_path, status) VALUES (?,?,?) RETURNING id",
[title, path, status],
).fetchone()[0]
conn.commit()
conn.close()
return doc_id
def test_list_library_empty(client):
resp = client.get("/api/library")
assert resp.status_code == 200
assert resp.json() == []
def test_list_library_returns_documents(client, test_db):
_add_doc(test_db, "Player's Handbook", "/books/phb.pdf")
resp = client.get("/api/library")
assert resp.status_code == 200
docs = resp.json()
assert len(docs) == 1
assert docs[0]["title"] == "Player's Handbook"
assert "status" in docs[0]
def test_delete_document_removes_record(client, test_db):
doc_id = _add_doc(test_db, "Monster Manual", "/books/mm.pdf")
resp = client.delete(f"/api/library/{doc_id}")
assert resp.status_code == 204
resp2 = client.get("/api/library")
assert resp2.json() == []
def test_delete_nonexistent_returns_404(client):
resp = client.delete("/api/library/does-not-exist")
assert resp.status_code == 404
def test_reingest_returns_task_id(client, test_db, tmp_path):
pdf_path = str(tmp_path / "books" / "test.pdf")
open(pdf_path, "wb").write(b"%PDF-1.4")
doc_id = _add_doc(test_db, "Test Book", pdf_path)
resp = client.post(f"/api/library/{doc_id}/reingest")
assert resp.status_code == 202
assert "task_id" in resp.json()
def test_reingest_updates_status_to_processing(client, test_db, tmp_path):
from pathlib import Path
pdf_path = str(tmp_path / "books" / "dm_guide.pdf")
Path(pdf_path).write_bytes(b"%PDF-1.4 empty fixture")
doc_id = _add_doc(test_db, "DM Guide", pdf_path)
resp = client.post(f"/api/library/{doc_id}/reingest")
assert resp.status_code == 202
# Document should be in processing state (or beyond if stub ingest ran instantly)
status_resp = client.get(f"/api/library/{doc_id}/status")
assert status_resp.json()["status"] in ("processing", "error", "ready")

View file

@ -1,69 +0,0 @@
# tests/test_search_api.py
"""Tests for POST /api/search — BM25 keyword search (MIT, no tier gate)."""
from __future__ import annotations
import sqlite3
def _add_chunks(db_path: str, doc_id: str, chunks: list[dict]) -> None:
conn = sqlite3.connect(db_path)
conn.execute(
"INSERT OR IGNORE INTO documents(id, title, file_path, status) VALUES (?,'Book','p.pdf','ready')",
[doc_id],
)
for c in chunks:
conn.execute(
"INSERT INTO page_chunks(doc_id, page_number, text, source, word_count) VALUES (?,?,?,?,?)",
[doc_id, c["page_number"], c["text"], "text_layer", len(c["text"].split())],
)
conn.commit()
conn.close()
def test_search_returns_results(client, test_db, monkeypatch):
import app.api.search as _search_mod
monkeypatch.setattr(_search_mod, "_get_db_path", lambda: test_db)
# BM25Okapi IDF is 0 when df == N/2 (e.g. 2 docs, 1 match → log(1.0) = 0).
# Add a 3rd unrelated chunk so relevant terms score above zero.
_add_chunks(test_db, "book-a", [
{"page_number": 1, "text": "Fireball deals 8d6 fire damage on a failed saving throw."},
{"page_number": 2, "text": "Cure Wounds restores hit points to a living creature."},
{"page_number": 3, "text": "Shield grants plus five to armor class until next turn."},
])
resp = client.post("/api/search", json={"query": "fireball fire damage"})
assert resp.status_code == 200
results = resp.json()
assert len(results) >= 1
assert results[0]["page_number"] == 1
assert results[0]["bm25_score"] > 0
assert "text_snippet" in results[0]
def test_search_empty_index_returns_empty(client):
resp = client.post("/api/search", json={"query": "anything"})
assert resp.status_code == 200
assert resp.json() == []
def test_search_filters_by_doc_ids(client, test_db, monkeypatch):
import app.api.search as _search_mod
monkeypatch.setattr(_search_mod, "_get_db_path", lambda: test_db)
# Three chunks so BM25Okapi IDF is non-zero for terms appearing in one doc.
_add_chunks(test_db, "book-a", [
{"page_number": 1, "text": "Grapple rules for melee attacks."},
{"page_number": 2, "text": "Shield spell protects from incoming blows."},
])
_add_chunks(test_db, "book-b", [{"page_number": 3, "text": "Grapple also applies to ranged attacks."}])
resp = client.post("/api/search", json={"query": "grapple", "doc_ids": ["book-a"]})
assert resp.status_code == 200
results = resp.json()
assert len(results) >= 1, "expected at least one grapple result from book-a"
assert all(r["doc_id"] == "book-a" for r in results)
def test_search_has_no_tier_gate(client):
# Search endpoint must return 200 with no PAGEPIPER_OLLAMA_URL set
resp = client.post("/api/search", json={"query": "anything"})
assert resp.status_code == 200 # Not 402

View file

@ -1,53 +0,0 @@
# tests/test_synthesizer.py
"""Tests for Synthesizer — mocked LLM, citation assembly."""
from __future__ import annotations
from unittest.mock import MagicMock
from app.services.retriever import RetrievedChunk
from app.services.synthesizer import Synthesizer, SynthesisResult
def _chunk(doc_id: str = "book-a", page: int = 5, text: str = "Fireball rules") -> RetrievedChunk:
return RetrievedChunk(
chunk_id="c1", doc_id=doc_id, page_number=page, text=text,
bm25_score=1.0, vector_score=None,
)
def test_synthesizer_returns_answer_and_citations():
mock_llm = MagicMock()
mock_llm.complete.return_value = "Fireball deals 8d6 damage [p.5]."
synth = Synthesizer(mock_llm)
result = synth.synthesize(
message="How does Fireball work?",
history=[],
chunks=[_chunk()],
)
assert isinstance(result, SynthesisResult)
assert "Fireball" in result.answer
assert len(result.citations) == 1
assert result.citations[0].page_number == 5
assert result.citations[0].doc_id == "book-a"
def test_synthesizer_builds_context_from_chunks():
mock_llm = MagicMock()
mock_llm.complete.return_value = "Answer."
synth = Synthesizer(mock_llm)
synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")])
assert "Detailed rule text here." in mock_llm.complete.call_args.args[0]
def test_synthesizer_uses_system_prompt():
mock_llm = MagicMock()
mock_llm.complete.return_value = "Answer."
synth = Synthesizer(mock_llm)
synth.synthesize("Q?", [], [_chunk()])
call_kwargs = mock_llm.complete.call_args
assert call_kwargs.kwargs.get("system") or call_kwargs[1].get("system")

39
web/.gitignore vendored
View file

@ -1,39 +0,0 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
.DS_Store
dist
dist-ssr
coverage
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
*.tsbuildinfo
.eslintcache
# Cypress
/cypress/videos/
/cypress/screenshots/
# Vitest
__screenshots__/
# Vite
*.timestamp-*-*.mjs

View file

@ -1,3 +0,0 @@
{
"recommendations": ["Vue.volar"]
}

View file

@ -1,42 +0,0 @@
# web
This template should help get you started developing with Vue 3 in Vite.
## Recommended IDE Setup
[VS Code](https://code.visualstudio.com/) + [Vue (Official)](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur).
## Recommended Browser Setup
- Chromium-based browsers (Chrome, Edge, Brave, etc.):
- [Vue.js devtools](https://chromewebstore.google.com/detail/vuejs-devtools/nhdogjmejiglipccpnnnanhbledajbpd)
- [Turn on Custom Object Formatter in Chrome DevTools](http://bit.ly/object-formatters)
- Firefox:
- [Vue.js devtools](https://addons.mozilla.org/en-US/firefox/addon/vue-js-devtools/)
- [Turn on Custom Object Formatter in Firefox DevTools](https://fxdx.dev/firefox-devtools-custom-object-formatters/)
## Type Support for `.vue` Imports in TS
TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types.
## Customize configuration
See [Vite Configuration Reference](https://vite.dev/config/).
## Project Setup
```sh
npm install
```
### Compile and Hot-Reload for Development
```sh
npm run dev
```
### Type-Check, Compile and Minify for Production
```sh
npm run build
```

1
web/env.d.ts vendored
View file

@ -1 +0,0 @@
/// <reference types="vite/client" />

View file

@ -1,13 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Pagepiper</title>
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.ts"></script>
</body>
</html>

2898
web/package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -1,31 +0,0 @@
{
"name": "web",
"version": "0.0.0",
"private": true,
"type": "module",
"scripts": {
"dev": "vite",
"build": "run-p type-check \"build-only {@}\" --",
"preview": "vite preview",
"build-only": "vite build",
"type-check": "vue-tsc --build"
},
"dependencies": {
"vue": "^3.5.32",
"vue-router": "^5.0.4"
},
"devDependencies": {
"@tsconfig/node24": "^24.0.4",
"@types/node": "^24.12.2",
"@vitejs/plugin-vue": "^6.0.6",
"@vue/tsconfig": "^0.9.1",
"npm-run-all2": "^8.0.4",
"typescript": "~6.0.0",
"vite": "^8.0.8",
"vite-plugin-vue-devtools": "^8.1.1",
"vue-tsc": "^3.2.6"
},
"engines": {
"node": "^20.19.0 || >=22.12.0"
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.2 KiB

View file

@ -1,33 +0,0 @@
<template>
<div id="app">
<nav class="nav">
<span class="nav-brand">Pagepiper</span>
<RouterLink to="/" class="nav-link">Library</RouterLink>
<RouterLink to="/chat" class="nav-link">Chat</RouterLink>
</nav>
<RouterView />
</div>
</template>
<script setup lang="ts">
import { RouterLink, RouterView } from "vue-router"
</script>
<style>
@import "@/theme.css";
.nav {
display: flex;
align-items: center;
gap: 1.5rem;
padding: 0.75rem 1.5rem;
background: var(--color-surface);
border-bottom: 1px solid var(--color-border);
position: sticky;
top: 0;
z-index: 100;
}
.nav-brand { font-weight: 700; color: var(--color-accent); }
.nav-link { color: var(--color-text-muted); text-decoration: none; }
.nav-link:hover, .nav-link.router-link-active { color: var(--color-text); }
</style>

View file

@ -1,101 +0,0 @@
// web/src/api.ts
const BASE = import.meta.env.VITE_API_BASE ?? ""
export interface Document {
id: string
title: string
file_path: string
status: "pending" | "processing" | "ready" | "error"
task_id: string | null
page_count: number | null
created_at: string
}
export interface SearchResult {
chunk_id: string
doc_id: string
page_number: number
text_snippet: string
bm25_score: number
}
export interface Citation {
doc_id: string
page_number: number
snippet: string
bm25_score: number | null
}
export interface ChatResponse {
answer: string
citations: Citation[]
}
export interface TaskStatus {
status: string
progress?: number
error?: string
}
export interface ChatMessage {
role: string
content: string
}
export const api = {
async getLibrary(): Promise<Document[]> {
const r = await fetch(`${BASE}/api/library`)
if (!r.ok) throw new Error(await r.text())
return r.json()
},
async scanLibrary(): Promise<{ discovered: number; queued: number; tasks: { doc_id: string; task_id: string }[] }> {
const r = await fetch(`${BASE}/api/library/scan`, { method: "POST" })
if (!r.ok) throw new Error(await r.text())
return r.json()
},
async reingestDocument(docId: string): Promise<{ task_id: string }> {
const r = await fetch(`${BASE}/api/library/${docId}/reingest`, { method: "POST" })
if (!r.ok) throw new Error(await r.text())
return r.json()
},
async deleteDocument(docId: string): Promise<void> {
const r = await fetch(`${BASE}/api/library/${docId}`, { method: "DELETE" })
if (!r.ok) throw new Error(await r.text())
},
async getTaskStatus(taskId: string): Promise<TaskStatus> {
const r = await fetch(`${BASE}/api/ingest/${taskId}`)
if (!r.ok) throw new Error(await r.text())
return r.json()
},
async search(query: string, topK = 10, docIds?: string[]): Promise<SearchResult[]> {
const r = await fetch(`${BASE}/api/search`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ query, top_k: topK, doc_ids: docIds ?? null }),
})
if (!r.ok) throw new Error(await r.text())
return r.json()
},
async chat(
message: string,
history: ChatMessage[],
docIds?: string[],
topK = 5,
): Promise<ChatResponse> {
const r = await fetch(`${BASE}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ message, history, doc_ids: docIds ?? null, top_k: topK }),
})
if (!r.ok) {
const body = await r.json().catch(() => ({}))
const err: Error & { status?: number; detail?: unknown } = new Error(
(body as { detail?: { message?: string } }).detail?.message ?? "Request failed"
)
err.status = r.status
err.detail = (body as { detail?: unknown }).detail
throw err
}
return r.json()
},
}

View file

@ -1,69 +0,0 @@
<template>
<div class="citation-panel">
<button
class="citation-toggle"
:aria-expanded="open"
@click="open = !open"
>
<span class="citation-badge" :class="{ 'nat20': showNat20 }" aria-hidden="true">
{{ showNat20 ? "⚀ Natural 20" : `p.${citation.page_number}` }}
</span>
<span class="citation-doc">{{ docTitle }}</span>
<span class="citation-chevron">{{ open ? "▲" : "▼" }}</span>
</button>
<div class="citation-body" v-show="open" role="region" :aria-label="`Excerpt from page ${citation.page_number}`">
<p class="citation-source-label">Source text (not paraphrased):</p>
<blockquote class="citation-text">{{ citation.snippet }}</blockquote>
</div>
</div>
</template>
<script setup lang="ts">
import { onMounted, ref } from "vue"
import type { Citation } from "@/api"
const props = defineProps<{
citation: Citation
docTitle?: string
bm25Score?: number
}>()
const open = ref(false)
const showNat20 = ref(false)
const NAT20_THRESHOLD = 8.0
onMounted(() => {
const prefersReduced = window.matchMedia("(prefers-reduced-motion: reduce)").matches
if (!prefersReduced && props.bm25Score && props.bm25Score >= NAT20_THRESHOLD) {
showNat20.value = true
setTimeout(() => { showNat20.value = false }, 300)
}
})
</script>
<style scoped>
.citation-panel { border: 1px solid var(--color-border); border-radius: var(--radius-sm); margin-bottom: 0.5rem; overflow: hidden; }
.citation-toggle {
width: 100%; display: flex; align-items: center; gap: 0.75rem; padding: 0.6rem 0.75rem;
background: var(--color-surface-alt); border: none; cursor: pointer; color: var(--color-text);
text-align: left;
}
.citation-toggle:hover { background: var(--color-border); }
.citation-badge {
font-size: 0.75rem; font-weight: 700; padding: 2px 8px;
background: var(--color-surface); border-radius: var(--radius-sm);
border: 1px solid var(--color-border); font-family: var(--font-mono);
white-space: nowrap; transition: background var(--transition-fast), color var(--transition-fast);
}
.citation-badge.nat20 { background: var(--color-accent); color: #fff; border-color: var(--color-accent); }
.citation-doc { flex: 1; font-size: 0.85rem; color: var(--color-text-muted); }
.citation-chevron { font-size: 0.7rem; color: var(--color-text-muted); }
.citation-body { padding: 0.75rem; background: var(--color-surface); }
.citation-source-label { font-size: 0.75rem; color: var(--color-text-muted); margin-bottom: 0.4rem; font-style: italic; }
.citation-text {
border-left: 3px solid var(--color-accent); padding-left: 0.75rem;
font-size: 0.9rem; color: var(--color-text); line-height: 1.6;
}
</style>

View file

@ -1,68 +0,0 @@
<template>
<div class="doc-card" :class="`status-${doc.status}`">
<div class="doc-status-badge">{{ doc.status }}</div>
<div class="doc-title">{{ doc.title }}</div>
<div class="doc-meta" v-if="doc.page_count != null">{{ doc.page_count }} pages</div>
<div class="doc-meta path">{{ shortPath }}</div>
<IngestProgress
v-if="doc.status === 'processing' && doc.task_id"
:task-id="doc.task_id"
@done="emit('refresh')"
/>
<div class="doc-actions">
<button class="btn-sm" @click="emit('reingest', doc.id)" :disabled="doc.status === 'processing'">
Re-index
</button>
<button class="btn-sm danger" @click="emit('delete', doc.id)">Remove</button>
</div>
</div>
</template>
<script setup lang="ts">
import { computed } from "vue"
import type { Document } from "@/api"
import IngestProgress from "@/components/IngestProgress.vue"
const props = defineProps<{ doc: Document }>()
const emit = defineEmits<{ reingest: [id: string]; delete: [id: string]; refresh: [] }>()
const shortPath = computed(() => {
const parts = props.doc.file_path.split("/")
return parts.slice(-2).join("/")
})
</script>
<style scoped>
.doc-card {
background: var(--color-surface);
border: 1px solid var(--color-border);
border-radius: var(--radius-md);
padding: 1rem;
display: flex;
flex-direction: column;
gap: 0.4rem;
box-shadow: var(--shadow-card);
position: relative;
}
.doc-card.status-error { border-color: var(--color-error); }
.doc-card.status-ready { border-color: var(--color-success); }
.doc-title { font-weight: 600; font-size: 1rem; }
.doc-meta { font-size: 0.8rem; color: var(--color-text-muted); }
.doc-meta.path { font-family: var(--font-mono); word-break: break-all; }
.doc-status-badge {
position: absolute; top: 0.5rem; right: 0.75rem;
font-size: 0.7rem; font-weight: 700; text-transform: uppercase;
padding: 2px 6px; border-radius: var(--radius-sm);
background: var(--color-surface-alt);
}
.doc-actions { display: flex; gap: 0.5rem; margin-top: 0.5rem; }
.btn-sm {
padding: 4px 10px; border: 1px solid var(--color-border); border-radius: var(--radius-sm);
background: var(--color-surface-alt); color: var(--color-text); cursor: pointer; font-size: 0.8rem;
}
.btn-sm:hover { border-color: var(--color-accent); }
.btn-sm.danger:hover { border-color: var(--color-error); color: var(--color-error); }
.btn-sm:disabled { opacity: 0.4; cursor: default; }
</style>

View file

@ -1,82 +0,0 @@
<template>
<div class="ingest-progress" v-if="visible">
<div class="progress-label">
<span>{{ statusLabel }}</span>
<span class="progress-pct" v-if="status?.progress != null">{{ status.progress }}%</span>
</div>
<div class="progress-bar">
<div class="progress-fill" :style="{ width: barWidth }" />
</div>
<p class="progress-error" v-if="status?.status === 'error'">{{ status.error }}</p>
</div>
</template>
<script setup lang="ts">
import { computed, onMounted, onUnmounted, ref, watch } from "vue"
import { api, type TaskStatus } from "@/api"
const props = defineProps<{ taskId: string | null }>()
const emit = defineEmits<{ done: [] }>()
const status = ref<TaskStatus | null>(null)
let timer: ReturnType<typeof setInterval> | null = null
const visible = computed(() => props.taskId !== null && status.value?.status !== "complete")
const statusLabel = computed(() => {
if (!status.value) return "Queued…"
const map: Record<string, string> = {
running: "Indexing…",
complete: "Done",
error: "Error",
}
return map[status.value.status] ?? "Processing…"
})
const barWidth = computed(() => {
const p = status.value?.progress ?? 0
return `${Math.min(p, 100)}%`
})
async function poll() {
if (!props.taskId) return
try {
status.value = await api.getTaskStatus(props.taskId)
if (status.value.status === "complete") {
stopPoll()
emit("done")
} else if (status.value.status === "error") {
stopPoll()
}
} catch (_e: unknown) { /* task not yet registered */ }
}
function stopPoll() {
if (timer) { clearInterval(timer); timer = null }
}
function startPoll() {
stopPoll()
status.value = null
if (props.taskId) {
poll()
timer = setInterval(poll, 2000)
}
}
watch(() => props.taskId, (newId) => {
if (newId) startPoll()
else stopPoll()
})
onMounted(startPoll)
onUnmounted(stopPoll)
</script>
<style scoped>
.ingest-progress { margin-top: 0.5rem; }
.progress-label { display: flex; justify-content: space-between; font-size: 0.8rem; color: var(--color-text-muted); margin-bottom: 4px; }
.progress-bar { height: 4px; background: var(--color-border); border-radius: 2px; overflow: hidden; }
.progress-fill { height: 100%; background: var(--color-accent); transition: width 0.3s ease; }
.progress-error { color: var(--color-error); font-size: 0.8rem; margin-top: 4px; }
</style>

View file

@ -1,5 +0,0 @@
import { createApp } from "vue"
import App from "./App.vue"
import router from "./router"
createApp(App).use(router).mount("#app")

View file

@ -1,11 +0,0 @@
import { createRouter, createWebHistory } from "vue-router"
import LibraryView from "@/views/LibraryView.vue"
import ChatView from "@/views/ChatView.vue"
export default createRouter({
history: createWebHistory(import.meta.env.VITE_BASE_URL),
routes: [
{ path: "/", name: "library", component: LibraryView },
{ path: "/chat", name: "chat", component: ChatView },
],
})

View file

@ -1,47 +0,0 @@
/* web/src/theme.css */
:root {
--color-bg: #1a1a2e;
--color-surface: #16213e;
--color-surface-alt: #0f3460;
--color-accent: #e94560;
--color-accent-dim: #a83050;
--color-text: #e8e8e8;
--color-text-muted: #9e9e9e;
--color-success: #4caf50;
--color-warning: #ff9800;
--color-error: #f44336;
--color-border: #2a2a4a;
--font-base: system-ui, -apple-system, sans-serif;
--font-mono: "Fira Code", "Cascadia Code", monospace;
--radius-sm: 4px;
--radius-md: 8px;
--radius-lg: 16px;
--shadow-card: 0 2px 8px rgba(0,0,0,0.4);
--transition-fast: 150ms ease;
}
@media (prefers-color-scheme: light) {
:root {
--color-bg: #f5f5f5;
--color-surface: #ffffff;
--color-surface-alt: #e8eaf6;
--color-accent: #c62828;
--color-accent-dim: #e57373;
--color-text: #212121;
--color-text-muted: #757575;
--color-border: #e0e0e0;
}
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
background: var(--color-bg);
color: var(--color-text);
font-family: var(--font-base);
font-size: 1rem;
line-height: 1.6;
min-height: 100vh;
}

View file

@ -1,240 +0,0 @@
<template>
<div class="chat-layout">
<!-- Message pane -->
<div class="chat-pane">
<div class="chat-messages" ref="messagesEl">
<p class="empty-chat" v-if="history.length === 0">
Ask a question across your indexed rulebooks.
No rulebooks indexed? Go to <RouterLink to="/">Library</RouterLink> first.
</p>
<div
v-for="(msg, i) in history"
:key="i"
class="message"
:class="msg.role"
>
<div class="message-body">{{ msg.content }}</div>
<div class="message-citations" v-if="msg.citations?.length">
<p class="citations-label">Sources:</p>
<CitationPanel
v-for="(cite, j) in msg.citations"
:key="j"
:citation="cite"
:doc-title="docTitles[cite.doc_id] ?? cite.doc_id"
:bm25-score="cite.bm25_score ?? undefined"
/>
</div>
</div>
<div class="message assistant loading" v-if="thinking">
<div class="loading-dots"><span /><span /><span /></div>
</div>
</div>
<p class="error-banner" v-if="errorMsg" role="alert">
{{ errorMsg }}
<span v-if="error402"> — <RouterLink to="/">Library</RouterLink> or set PAGEPIPER_OLLAMA_URL.</span>
</p>
<form class="chat-input-row" @submit.prevent="send">
<input
ref="inputEl"
v-model="draft"
class="chat-input"
placeholder="Ask about your rulebooks…"
:disabled="thinking"
aria-label="Chat message"
autofocus
/>
<button class="btn-send" type="submit" :disabled="thinking || !draft.trim()">Send</button>
</form>
</div>
<!-- Book filter sidebar -->
<aside class="sidebar" role="complementary" aria-label="Filter by book">
<h2 class="sidebar-title">Books</h2>
<p class="sidebar-hint">Select books to search (all = none selected)</p>
<label
v-for="doc in readyDocs"
:key="doc.id"
class="book-filter"
>
<input type="checkbox" :value="doc.id" v-model="selectedDocs" />
{{ doc.title }}
</label>
</aside>
</div>
</template>
<script setup lang="ts">
import { computed, nextTick, onMounted, ref } from "vue"
import { RouterLink } from "vue-router"
import { api, type Citation, type Document } from "@/api"
import CitationPanel from "@/components/CitationPanel.vue"
interface ChatMessage {
role: "user" | "assistant"
content: string
citations?: Citation[]
}
const history = ref<ChatMessage[]>([])
const draft = ref("")
const thinking = ref(false)
const errorMsg = ref("")
const error402 = ref(false)
const messagesEl = ref<HTMLElement | null>(null)
const inputEl = ref<HTMLInputElement | null>(null)
const allDocs = ref<Document[]>([])
const selectedDocs = ref<string[]>([])
const readyDocs = computed(() => allDocs.value.filter(d => d.status === "ready"))
const docTitles = computed(() =>
Object.fromEntries(allDocs.value.map(d => [d.id, d.title]))
)
onMounted(async () => {
allDocs.value = await api.getLibrary().catch(() => [])
inputEl.value?.focus()
})
async function send() {
const msg = draft.value.trim()
if (!msg || thinking.value) return
draft.value = ""
errorMsg.value = ""
error402.value = false
history.value.push({ role: "user", content: msg })
thinking.value = true
await nextTick()
scrollBottom()
try {
const docIds = selectedDocs.value.length ? selectedDocs.value : undefined
const apiHistory = history.value.slice(0, -1).map(m => ({ role: m.role, content: m.content }))
const result = await api.chat(msg, apiHistory, docIds)
history.value.push({ role: "assistant", content: result.answer, citations: result.citations })
} catch (err: unknown) {
const e = err as Error & { status?: number; detail?: { message?: string } }
if (e.status === 402) {
error402.value = true
errorMsg.value = e.detail?.message ?? "Ollama not configured. Set PAGEPIPER_OLLAMA_URL."
} else {
errorMsg.value = e.message ?? "Something went wrong."
}
} finally {
thinking.value = false
await nextTick()
scrollBottom()
inputEl.value?.focus()
}
}
function scrollBottom() {
if (messagesEl.value) {
messagesEl.value.scrollTop = messagesEl.value.scrollHeight
}
}
</script>
<style scoped>
.chat-layout {
display: flex;
height: calc(100vh - 56px);
overflow: hidden;
}
.chat-pane {
flex: 1;
display: flex;
flex-direction: column;
overflow: hidden;
}
.chat-messages {
flex: 1;
overflow-y: auto;
padding: 1.5rem;
display: flex;
flex-direction: column;
gap: 1rem;
}
.empty-chat { color: var(--color-text-muted); line-height: 1.8; }
.message { max-width: 80%; }
.message.user { align-self: flex-end; }
.message.assistant { align-self: flex-start; }
.message-body {
background: var(--color-surface);
border: 1px solid var(--color-border);
border-radius: var(--radius-md);
padding: 0.75rem 1rem;
line-height: 1.6;
white-space: pre-wrap;
}
.message.user .message-body {
background: var(--color-surface-alt);
border-color: transparent;
}
.message-citations { margin-top: 0.75rem; }
.citations-label { font-size: 0.75rem; color: var(--color-text-muted); margin-bottom: 0.4rem; font-style: italic; }
.loading-dots { display: flex; gap: 6px; padding: 0.75rem 1rem; }
.loading-dots span {
width: 8px; height: 8px; border-radius: 50%;
background: var(--color-text-muted);
animation: bounce 1.2s ease-in-out infinite;
}
.loading-dots span:nth-child(2) { animation-delay: 0.2s; }
.loading-dots span:nth-child(3) { animation-delay: 0.4s; }
@keyframes bounce { 0%, 80%, 100% { transform: scale(0.6); } 40% { transform: scale(1); } }
@media (prefers-reduced-motion: reduce) { .loading-dots span { animation: none; opacity: 0.5; } }
.error-banner {
padding: 0.75rem 1.5rem;
background: color-mix(in srgb, var(--color-error) 15%, var(--color-surface));
color: var(--color-error);
font-size: 0.9rem;
}
.chat-input-row {
display: flex;
gap: 0.5rem;
padding: 1rem 1.5rem;
border-top: 1px solid var(--color-border);
background: var(--color-surface);
}
.chat-input {
flex: 1; padding: 0.6rem 1rem;
background: var(--color-bg); border: 1px solid var(--color-border);
border-radius: var(--radius-sm); color: var(--color-text); font-size: 1rem;
}
.chat-input:focus { outline: 2px solid var(--color-accent); border-color: transparent; }
.btn-send {
padding: 0.6rem 1.25rem; background: var(--color-accent); color: #fff;
border: none; border-radius: var(--radius-sm); cursor: pointer; font-size: 0.95rem;
}
.btn-send:disabled { opacity: 0.4; cursor: default; }
.sidebar {
width: 240px; border-left: 1px solid var(--color-border);
background: var(--color-surface); overflow-y: auto; padding: 1rem;
}
.sidebar-title { font-size: 1rem; font-weight: 600; margin-bottom: 0.5rem; }
.sidebar-hint { font-size: 0.75rem; color: var(--color-text-muted); margin-bottom: 0.75rem; line-height: 1.4; }
.book-filter {
display: flex; align-items: flex-start; gap: 0.5rem;
font-size: 0.85rem; margin-bottom: 0.5rem; cursor: pointer; line-height: 1.4;
}
@media (max-width: 640px) {
.chat-layout { flex-direction: column-reverse; }
.sidebar { width: 100%; height: auto; max-height: 30vh; border-left: none; border-top: 1px solid var(--color-border); }
.message { max-width: 95%; }
}
</style>

View file

@ -1,108 +0,0 @@
<template>
<main class="library">
<header class="library-header">
<h1>Library</h1>
<button class="btn-primary" @click="scan" :disabled="scanning">
{{ scanning ? "Scanning..." : "Scan for PDFs" }}
</button>
</header>
<p class="error-msg" v-if="error">{{ error }}</p>
<p class="empty-state" v-if="!loading && docs.length === 0">
No books indexed yet. Click "Scan for PDFs" to discover PDFs in your books directory.<br>
Make sure your PDF directory is mounted at <code>/books</code> inside the container.
</p>
<div class="doc-grid" v-else>
<DocumentCard
v-for="doc in docs"
:key="doc.id"
:doc="doc"
@reingest="reingest"
@delete="remove"
@refresh="load"
/>
</div>
<p class="scan-result" v-if="scanResult">
Found {{ scanResult.discovered }} PDFs, queued {{ scanResult.queued }} for indexing.
</p>
</main>
</template>
<script setup lang="ts">
import { onMounted, ref } from "vue"
import { api, type Document } from "@/api"
import DocumentCard from "@/components/DocumentCard.vue"
const docs = ref<Document[]>([])
const loading = ref(true)
const scanning = ref(false)
const error = ref<string | null>(null)
const scanResult = ref<{ discovered: number; queued: number } | null>(null)
async function load() {
loading.value = true
error.value = null
try {
docs.value = await api.getLibrary()
} catch (e) {
error.value = e instanceof Error ? e.message : "Failed to load library"
} finally {
loading.value = false
}
}
async function scan() {
scanning.value = true
error.value = null
try {
scanResult.value = await api.scanLibrary()
await load()
} catch (e) {
error.value = e instanceof Error ? e.message : "Scan failed"
} finally {
scanning.value = false
}
}
async function reingest(id: string) {
error.value = null
try {
await api.reingestDocument(id)
await load()
} catch (e) {
error.value = e instanceof Error ? e.message : "Re-index failed"
}
}
async function remove(id: string) {
if (!confirm("Remove this book from the library? The PDF file is not deleted.")) return
error.value = null
try {
await api.deleteDocument(id)
await load()
} catch (e) {
error.value = e instanceof Error ? e.message : "Remove failed"
}
}
onMounted(load)
</script>
<style scoped>
.library { padding: 1.5rem; max-width: 1200px; margin: 0 auto; }
.library-header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 1.5rem; flex-wrap: wrap; gap: 1rem; }
h1 { font-size: 1.5rem; }
.btn-primary {
background: var(--color-accent); color: #fff; border: none; padding: 0.6rem 1.2rem;
border-radius: var(--radius-sm); cursor: pointer; font-size: 0.95rem;
}
.btn-primary:disabled { opacity: 0.5; cursor: default; }
.doc-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 1rem; }
.empty-state { color: var(--color-text-muted); line-height: 1.8; }
.empty-state code { font-family: var(--font-mono); background: var(--color-surface-alt); padding: 2px 6px; border-radius: 3px; }
.scan-result { margin-top: 1rem; color: var(--color-text-muted); font-size: 0.9rem; }
.error-msg { color: var(--color-error); margin-bottom: 1rem; font-size: 0.9rem; }
</style>

View file

@ -1,18 +0,0 @@
{
"extends": "@vue/tsconfig/tsconfig.dom.json",
"include": ["env.d.ts", "src/**/*", "src/**/*.vue"],
"exclude": ["src/**/__tests__/*"],
"compilerOptions": {
// Extra safety for array and object lookups, but may have false positives.
"noUncheckedIndexedAccess": true,
// Path mapping for cleaner imports.
"paths": {
"@/*": ["./src/*"]
},
// `vue-tsc --build` produces a .tsbuildinfo file for incremental type-checking.
// Specified here to keep it out of the root directory.
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo"
}
}

View file

@ -1,11 +0,0 @@
{
"files": [],
"references": [
{
"path": "./tsconfig.node.json"
},
{
"path": "./tsconfig.app.json"
}
]
}

View file

@ -1,27 +0,0 @@
// TSConfig for modules that run in Node.js environment via either transpilation or type-stripping.
{
"extends": "@tsconfig/node24/tsconfig.json",
"include": [
"vite.config.*",
"vitest.config.*",
"cypress.config.*",
"playwright.config.*",
"eslint.config.*"
],
"compilerOptions": {
// Most tools use transpilation instead of Node.js's native type-stripping.
// Bundler mode provides a smoother developer experience.
"module": "preserve",
"moduleResolution": "bundler",
// Include Node.js types and avoid accidentally including other `@types/*` packages.
"types": ["node"],
// Disable emitting output during `vue-tsc --build`, which is used for type-checking only.
"noEmit": true,
// `vue-tsc --build` produces a .tsbuildinfo file for incremental type-checking.
// Specified here to keep it out of the root directory.
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo"
}
}

View file

@ -1,18 +0,0 @@
import { fileURLToPath, URL } from 'node:url'
import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue'
import vueDevTools from 'vite-plugin-vue-devtools'
// https://vite.dev/config/
export default defineConfig({
plugins: [
vue(),
vueDevTools(),
],
resolve: {
alias: {
'@': fileURLToPath(new URL('./src', import.meta.url))
},
},
})