chore: initial pagepiper repo scaffold
Adds pyproject.toml, environment.yml, Dockerfile, docker/web (Vue+nginx), compose.yml, compose.override.yml.example, manage.sh, .env.example, .gitignore, and config stubs for the pagepiper self-hosted PDF library tool. Port 8521. No secrets committed.
This commit is contained in:
commit
3a0608ff98
12 changed files with 242 additions and 0 deletions
12
.env.example
Normal file
12
.env.example
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
# Copy to .env and fill in your values. .env is gitignored.
|
||||||
|
|
||||||
|
# Path to your PDF library on the host machine
|
||||||
|
PAGEPIPER_BOOKS_DIR=/path/to/your/pdfs
|
||||||
|
|
||||||
|
# Data directory (SQLite + vector DB stored here)
|
||||||
|
PAGEPIPER_DATA_DIR=data
|
||||||
|
|
||||||
|
# Ollama URL — set this to unlock semantic search and RAG chat (BYOK)
|
||||||
|
# PAGEPIPER_OLLAMA_URL=http://localhost:11434
|
||||||
|
# PAGEPIPER_CHAT_MODEL=mistral:7b
|
||||||
|
# PAGEPIPER_EMBED_MODEL=nomic-embed-text
|
||||||
24
.gitignore
vendored
Normal file
24
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
# Secrets and local config
|
||||||
|
.env
|
||||||
|
config/llm.yaml
|
||||||
|
CLAUDE.md
|
||||||
|
|
||||||
|
# Data
|
||||||
|
data/
|
||||||
|
books/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
.pytest_cache/
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
.eggs/
|
||||||
|
|
||||||
|
# Node
|
||||||
|
web/node_modules/
|
||||||
|
web/dist/
|
||||||
|
|
||||||
|
# Docker override (local dev extras)
|
||||||
|
compose.override.yml
|
||||||
31
Dockerfile
Normal file
31
Dockerfile
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
FROM continuumio/miniconda3:latest
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System deps for pytesseract (OCR) and pdfplumber
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
tesseract-ocr \
|
||||||
|
libgl1 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install circuitforge-core from sibling directory (compose sets context: ..)
|
||||||
|
COPY circuitforge-core/ ./circuitforge-core/
|
||||||
|
RUN conda run -n base pip install --no-cache-dir -e "./circuitforge-core[pdf,vector]"
|
||||||
|
|
||||||
|
# Create pagepiper conda env
|
||||||
|
COPY pagepiper/environment.yml .
|
||||||
|
RUN conda env create -f environment.yml
|
||||||
|
|
||||||
|
COPY pagepiper/ ./pagepiper/
|
||||||
|
|
||||||
|
# Remove gitignored secrets — defence-in-depth
|
||||||
|
RUN rm -f /app/pagepiper/.env /app/pagepiper/config/llm.yaml
|
||||||
|
|
||||||
|
# Install cf-core into pagepiper env + the app itself
|
||||||
|
RUN conda run -n pagepiper pip install --no-cache-dir -e "/app/circuitforge-core[pdf,vector]"
|
||||||
|
WORKDIR /app/pagepiper
|
||||||
|
RUN conda run -n pagepiper pip install --no-cache-dir -e .
|
||||||
|
|
||||||
|
EXPOSE 8521
|
||||||
|
CMD ["conda", "run", "--no-capture-output", "-n", "pagepiper", \
|
||||||
|
"uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8521"]
|
||||||
13
compose.override.yml.example
Normal file
13
compose.override.yml.example
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
# Copy to compose.override.yml and fill in your values.
|
||||||
|
# compose.override.yml is gitignored — never commit secrets.
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
environment:
|
||||||
|
# Point to your local Ollama instance to unlock semantic search and RAG chat
|
||||||
|
PAGEPIPER_OLLAMA_URL: "http://localhost:11434"
|
||||||
|
PAGEPIPER_CHAT_MODEL: "mistral:7b"
|
||||||
|
PAGEPIPER_EMBED_MODEL: "nomic-embed-text"
|
||||||
|
volumes:
|
||||||
|
# Override books directory if your PDFs are elsewhere
|
||||||
|
- /path/to/your/pdfs:/books:ro
|
||||||
21
compose.yml
Normal file
21
compose.yml
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: pagepiper/Dockerfile
|
||||||
|
network_mode: host
|
||||||
|
env_file: .env
|
||||||
|
volumes:
|
||||||
|
- ./data:/app/pagepiper/data
|
||||||
|
- ${PAGEPIPER_BOOKS_DIR:-./books}:/books:ro
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
web:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/web/Dockerfile
|
||||||
|
ports:
|
||||||
|
- "8521:80"
|
||||||
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
2
config/ingest.yaml
Normal file
2
config/ingest.yaml
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
ocr_min_words: 10 # Pages with fewer words from text layer → OCR fallback
|
||||||
|
batch_size: 32 # Pages embedded per Ollama call (tune to your GPU VRAM)
|
||||||
7
config/llm.yaml.example
Normal file
7
config/llm.yaml.example
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Copy to config/llm.yaml (gitignored) — or use .env / compose.override.yml instead.
|
||||||
|
|
||||||
|
provider: ollama
|
||||||
|
base_url: "${PAGEPIPER_OLLAMA_URL}"
|
||||||
|
chat_model: mistral:7b
|
||||||
|
embedding_model: nomic-embed-text # ollama pull nomic-embed-text
|
||||||
|
vector_store: sqlite_vec
|
||||||
19
docker/web/Dockerfile
Normal file
19
docker/web/Dockerfile
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
# Stage 1: build Vue SPA
|
||||||
|
FROM node:20-alpine AS build
|
||||||
|
WORKDIR /app
|
||||||
|
COPY web/package*.json ./
|
||||||
|
RUN npm ci --prefer-offline
|
||||||
|
COPY web/ ./
|
||||||
|
|
||||||
|
ARG VITE_BASE_URL=/
|
||||||
|
ARG VITE_API_BASE=
|
||||||
|
ENV VITE_BASE_URL=$VITE_BASE_URL
|
||||||
|
ENV VITE_API_BASE=$VITE_API_BASE
|
||||||
|
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Stage 2: serve via nginx
|
||||||
|
FROM nginx:alpine
|
||||||
|
COPY docker/web/nginx.conf /etc/nginx/conf.d/default.conf
|
||||||
|
COPY --from=build /app/dist /usr/share/nginx/html
|
||||||
|
EXPOSE 80
|
||||||
17
docker/web/nginx.conf
Normal file
17
docker/web/nginx.conf
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
root /usr/share/nginx/html;
|
||||||
|
index index.html;
|
||||||
|
|
||||||
|
# SPA routing — all non-asset paths → index.html
|
||||||
|
location / {
|
||||||
|
try_files $uri $uri/ /index.html;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Proxy API requests to FastAPI
|
||||||
|
location /api/ {
|
||||||
|
proxy_pass http://localhost:8521;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
}
|
||||||
|
}
|
||||||
19
environment.yml
Normal file
19
environment.yml
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
name: pagepiper
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- python=3.11
|
||||||
|
- pip
|
||||||
|
- pip:
|
||||||
|
- fastapi>=0.110
|
||||||
|
- uvicorn[standard]>=0.29
|
||||||
|
- rank-bm25>=0.2
|
||||||
|
- PyYAML>=6.0
|
||||||
|
- httpx>=0.27
|
||||||
|
- pdfplumber>=0.11
|
||||||
|
- pytesseract>=0.3
|
||||||
|
- Pillow>=10.0
|
||||||
|
- sqlite-vec>=0.1
|
||||||
|
- pytest>=8.0
|
||||||
|
- pytest-asyncio>=0.23
|
||||||
50
manage.sh
Executable file
50
manage.sh
Executable file
|
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SERVICE=pagepiper
|
||||||
|
WEB_PORT=8521
|
||||||
|
COMPOSE_FILE="compose.yml"
|
||||||
|
|
||||||
|
OVERRIDE_FLAG=""
|
||||||
|
[[ -f "compose.override.yml" ]] && OVERRIDE_FLAG="-f compose.override.yml"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 {start|stop|restart|status|logs [svc]|open|build|test}"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd="${1:-help}"
|
||||||
|
shift || true
|
||||||
|
|
||||||
|
case "$cmd" in
|
||||||
|
start)
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
|
||||||
|
echo "Pagepiper running → http://localhost:${WEB_PORT}"
|
||||||
|
;;
|
||||||
|
stop)
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG down
|
||||||
|
;;
|
||||||
|
restart)
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG down
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
|
||||||
|
echo "Pagepiper running → http://localhost:${WEB_PORT}"
|
||||||
|
;;
|
||||||
|
status)
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG ps
|
||||||
|
;;
|
||||||
|
logs)
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG logs -f "${1:-}"
|
||||||
|
;;
|
||||||
|
open)
|
||||||
|
xdg-open "http://localhost:${WEB_PORT}" 2>/dev/null || open "http://localhost:${WEB_PORT}"
|
||||||
|
;;
|
||||||
|
build)
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG build --no-cache
|
||||||
|
;;
|
||||||
|
test)
|
||||||
|
conda run -n cf pytest tests/ -v
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
27
pyproject.toml
Normal file
27
pyproject.toml
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=68", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "pagepiper"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Self-hosted PDF library manager with RAG chat and page-level citations"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
dependencies = [
|
||||||
|
"fastapi>=0.110",
|
||||||
|
"uvicorn[standard]>=0.29",
|
||||||
|
"python-multipart>=0.0.9",
|
||||||
|
"rank-bm25>=0.2",
|
||||||
|
"PyYAML>=6.0",
|
||||||
|
"httpx>=0.27",
|
||||||
|
"circuitforge-core[pdf,vector]>=0.19.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
where = ["."]
|
||||||
|
include = ["app*"]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
asyncio_mode = "auto"
|
||||||
Loading…
Reference in a new issue