chore: initial pagepiper repo scaffold

Adds pyproject.toml, environment.yml, Dockerfile, docker/web (Vue+nginx),
compose.yml, compose.override.yml.example, manage.sh, .env.example,
.gitignore, and config stubs for the pagepiper self-hosted PDF library tool.
Port 8521. No secrets committed.
This commit is contained in:
pyr0ball 2026-05-04 16:54:08 -07:00
commit 3a0608ff98
12 changed files with 242 additions and 0 deletions

12
.env.example Normal file
View file

@ -0,0 +1,12 @@
# Copy to .env and fill in your values. .env is gitignored.
# Path to your PDF library on the host machine
PAGEPIPER_BOOKS_DIR=/path/to/your/pdfs
# Data directory (SQLite + vector DB stored here)
PAGEPIPER_DATA_DIR=data
# Ollama URL — set this to unlock semantic search and RAG chat (BYOK)
# PAGEPIPER_OLLAMA_URL=http://localhost:11434
# PAGEPIPER_CHAT_MODEL=mistral:7b
# PAGEPIPER_EMBED_MODEL=nomic-embed-text

24
.gitignore vendored Normal file
View file

@ -0,0 +1,24 @@
# Secrets and local config
.env
config/llm.yaml
CLAUDE.md
# Data
data/
books/
# Python
__pycache__/
*.pyc
*.pyo
.pytest_cache/
*.egg-info/
dist/
.eggs/
# Node
web/node_modules/
web/dist/
# Docker override (local dev extras)
compose.override.yml

31
Dockerfile Normal file
View file

@ -0,0 +1,31 @@
FROM continuumio/miniconda3:latest
WORKDIR /app
# System deps for pytesseract (OCR) and pdfplumber
RUN apt-get update && apt-get install -y --no-install-recommends \
tesseract-ocr \
libgl1 \
&& rm -rf /var/lib/apt/lists/*
# Install circuitforge-core from sibling directory (compose sets context: ..)
COPY circuitforge-core/ ./circuitforge-core/
RUN conda run -n base pip install --no-cache-dir -e "./circuitforge-core[pdf,vector]"
# Create pagepiper conda env
COPY pagepiper/environment.yml .
RUN conda env create -f environment.yml
COPY pagepiper/ ./pagepiper/
# Remove gitignored secrets — defence-in-depth
RUN rm -f /app/pagepiper/.env /app/pagepiper/config/llm.yaml
# Install cf-core into pagepiper env + the app itself
RUN conda run -n pagepiper pip install --no-cache-dir -e "/app/circuitforge-core[pdf,vector]"
WORKDIR /app/pagepiper
RUN conda run -n pagepiper pip install --no-cache-dir -e .
EXPOSE 8521
CMD ["conda", "run", "--no-capture-output", "-n", "pagepiper", \
"uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8521"]

View file

@ -0,0 +1,13 @@
# Copy to compose.override.yml and fill in your values.
# compose.override.yml is gitignored — never commit secrets.
services:
api:
environment:
# Point to your local Ollama instance to unlock semantic search and RAG chat
PAGEPIPER_OLLAMA_URL: "http://localhost:11434"
PAGEPIPER_CHAT_MODEL: "mistral:7b"
PAGEPIPER_EMBED_MODEL: "nomic-embed-text"
volumes:
# Override books directory if your PDFs are elsewhere
- /path/to/your/pdfs:/books:ro

21
compose.yml Normal file
View file

@ -0,0 +1,21 @@
services:
api:
build:
context: ..
dockerfile: pagepiper/Dockerfile
network_mode: host
env_file: .env
volumes:
- ./data:/app/pagepiper/data
- ${PAGEPIPER_BOOKS_DIR:-./books}:/books:ro
restart: unless-stopped
web:
build:
context: .
dockerfile: docker/web/Dockerfile
ports:
- "8521:80"
restart: unless-stopped
depends_on:
- api

2
config/ingest.yaml Normal file
View file

@ -0,0 +1,2 @@
ocr_min_words: 10 # Pages with fewer words from text layer → OCR fallback
batch_size: 32 # Pages embedded per Ollama call (tune to your GPU VRAM)

7
config/llm.yaml.example Normal file
View file

@ -0,0 +1,7 @@
# Copy to config/llm.yaml (gitignored) — or use .env / compose.override.yml instead.
provider: ollama
base_url: "${PAGEPIPER_OLLAMA_URL}"
chat_model: mistral:7b
embedding_model: nomic-embed-text # ollama pull nomic-embed-text
vector_store: sqlite_vec

19
docker/web/Dockerfile Normal file
View file

@ -0,0 +1,19 @@
# Stage 1: build Vue SPA
FROM node:20-alpine AS build
WORKDIR /app
COPY web/package*.json ./
RUN npm ci --prefer-offline
COPY web/ ./
ARG VITE_BASE_URL=/
ARG VITE_API_BASE=
ENV VITE_BASE_URL=$VITE_BASE_URL
ENV VITE_API_BASE=$VITE_API_BASE
RUN npm run build
# Stage 2: serve via nginx
FROM nginx:alpine
COPY docker/web/nginx.conf /etc/nginx/conf.d/default.conf
COPY --from=build /app/dist /usr/share/nginx/html
EXPOSE 80

17
docker/web/nginx.conf Normal file
View file

@ -0,0 +1,17 @@
server {
listen 80;
root /usr/share/nginx/html;
index index.html;
# SPA routing all non-asset paths index.html
location / {
try_files $uri $uri/ /index.html;
}
# Proxy API requests to FastAPI
location /api/ {
proxy_pass http://localhost:8521;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
}

19
environment.yml Normal file
View file

@ -0,0 +1,19 @@
name: pagepiper
channels:
- conda-forge
- defaults
dependencies:
- python=3.11
- pip
- pip:
- fastapi>=0.110
- uvicorn[standard]>=0.29
- rank-bm25>=0.2
- PyYAML>=6.0
- httpx>=0.27
- pdfplumber>=0.11
- pytesseract>=0.3
- Pillow>=10.0
- sqlite-vec>=0.1
- pytest>=8.0
- pytest-asyncio>=0.23

50
manage.sh Executable file
View file

@ -0,0 +1,50 @@
#!/usr/bin/env bash
set -euo pipefail
SERVICE=pagepiper
WEB_PORT=8521
COMPOSE_FILE="compose.yml"
OVERRIDE_FLAG=""
[[ -f "compose.override.yml" ]] && OVERRIDE_FLAG="-f compose.override.yml"
usage() {
echo "Usage: $0 {start|stop|restart|status|logs [svc]|open|build|test}"
exit 1
}
cmd="${1:-help}"
shift || true
case "$cmd" in
start)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
echo "Pagepiper running → http://localhost:${WEB_PORT}"
;;
stop)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG down
;;
restart)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG down
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
echo "Pagepiper running → http://localhost:${WEB_PORT}"
;;
status)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG ps
;;
logs)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG logs -f "${1:-}"
;;
open)
xdg-open "http://localhost:${WEB_PORT}" 2>/dev/null || open "http://localhost:${WEB_PORT}"
;;
build)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG build --no-cache
;;
test)
conda run -n cf pytest tests/ -v
;;
*)
usage
;;
esac

27
pyproject.toml Normal file
View file

@ -0,0 +1,27 @@
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "pagepiper"
version = "0.1.0"
description = "Self-hosted PDF library manager with RAG chat and page-level citations"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.110",
"uvicorn[standard]>=0.29",
"python-multipart>=0.0.9",
"rank-bm25>=0.2",
"PyYAML>=6.0",
"httpx>=0.27",
"circuitforge-core[pdf,vector]>=0.19.0",
]
[tool.setuptools.packages.find]
where = ["."]
include = ["app*"]
[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "auto"