chore: initial pagepiper repo scaffold
Adds pyproject.toml, environment.yml, Dockerfile, docker/web (Vue+nginx), compose.yml, compose.override.yml.example, manage.sh, .env.example, .gitignore, and config stubs for the pagepiper self-hosted PDF library tool. Port 8521. No secrets committed.
This commit is contained in:
commit
3a0608ff98
12 changed files with 242 additions and 0 deletions
12
.env.example
Normal file
12
.env.example
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
# Copy to .env and fill in your values. .env is gitignored.
|
||||
|
||||
# Path to your PDF library on the host machine
|
||||
PAGEPIPER_BOOKS_DIR=/path/to/your/pdfs
|
||||
|
||||
# Data directory (SQLite + vector DB stored here)
|
||||
PAGEPIPER_DATA_DIR=data
|
||||
|
||||
# Ollama URL — set this to unlock semantic search and RAG chat (BYOK)
|
||||
# PAGEPIPER_OLLAMA_URL=http://localhost:11434
|
||||
# PAGEPIPER_CHAT_MODEL=mistral:7b
|
||||
# PAGEPIPER_EMBED_MODEL=nomic-embed-text
|
||||
24
.gitignore
vendored
Normal file
24
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# Secrets and local config
|
||||
.env
|
||||
config/llm.yaml
|
||||
CLAUDE.md
|
||||
|
||||
# Data
|
||||
data/
|
||||
books/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
.pytest_cache/
|
||||
*.egg-info/
|
||||
dist/
|
||||
.eggs/
|
||||
|
||||
# Node
|
||||
web/node_modules/
|
||||
web/dist/
|
||||
|
||||
# Docker override (local dev extras)
|
||||
compose.override.yml
|
||||
31
Dockerfile
Normal file
31
Dockerfile
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
FROM continuumio/miniconda3:latest
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# System deps for pytesseract (OCR) and pdfplumber
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tesseract-ocr \
|
||||
libgl1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install circuitforge-core from sibling directory (compose sets context: ..)
|
||||
COPY circuitforge-core/ ./circuitforge-core/
|
||||
RUN conda run -n base pip install --no-cache-dir -e "./circuitforge-core[pdf,vector]"
|
||||
|
||||
# Create pagepiper conda env
|
||||
COPY pagepiper/environment.yml .
|
||||
RUN conda env create -f environment.yml
|
||||
|
||||
COPY pagepiper/ ./pagepiper/
|
||||
|
||||
# Remove gitignored secrets — defence-in-depth
|
||||
RUN rm -f /app/pagepiper/.env /app/pagepiper/config/llm.yaml
|
||||
|
||||
# Install cf-core into pagepiper env + the app itself
|
||||
RUN conda run -n pagepiper pip install --no-cache-dir -e "/app/circuitforge-core[pdf,vector]"
|
||||
WORKDIR /app/pagepiper
|
||||
RUN conda run -n pagepiper pip install --no-cache-dir -e .
|
||||
|
||||
EXPOSE 8521
|
||||
CMD ["conda", "run", "--no-capture-output", "-n", "pagepiper", \
|
||||
"uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8521"]
|
||||
13
compose.override.yml.example
Normal file
13
compose.override.yml.example
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# Copy to compose.override.yml and fill in your values.
|
||||
# compose.override.yml is gitignored — never commit secrets.
|
||||
|
||||
services:
|
||||
api:
|
||||
environment:
|
||||
# Point to your local Ollama instance to unlock semantic search and RAG chat
|
||||
PAGEPIPER_OLLAMA_URL: "http://localhost:11434"
|
||||
PAGEPIPER_CHAT_MODEL: "mistral:7b"
|
||||
PAGEPIPER_EMBED_MODEL: "nomic-embed-text"
|
||||
volumes:
|
||||
# Override books directory if your PDFs are elsewhere
|
||||
- /path/to/your/pdfs:/books:ro
|
||||
21
compose.yml
Normal file
21
compose.yml
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
services:
|
||||
api:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: pagepiper/Dockerfile
|
||||
network_mode: host
|
||||
env_file: .env
|
||||
volumes:
|
||||
- ./data:/app/pagepiper/data
|
||||
- ${PAGEPIPER_BOOKS_DIR:-./books}:/books:ro
|
||||
restart: unless-stopped
|
||||
|
||||
web:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/web/Dockerfile
|
||||
ports:
|
||||
- "8521:80"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- api
|
||||
2
config/ingest.yaml
Normal file
2
config/ingest.yaml
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
ocr_min_words: 10 # Pages with fewer words from text layer → OCR fallback
|
||||
batch_size: 32 # Pages embedded per Ollama call (tune to your GPU VRAM)
|
||||
7
config/llm.yaml.example
Normal file
7
config/llm.yaml.example
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Copy to config/llm.yaml (gitignored) — or use .env / compose.override.yml instead.
|
||||
|
||||
provider: ollama
|
||||
base_url: "${PAGEPIPER_OLLAMA_URL}"
|
||||
chat_model: mistral:7b
|
||||
embedding_model: nomic-embed-text # ollama pull nomic-embed-text
|
||||
vector_store: sqlite_vec
|
||||
19
docker/web/Dockerfile
Normal file
19
docker/web/Dockerfile
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Stage 1: build Vue SPA
|
||||
FROM node:20-alpine AS build
|
||||
WORKDIR /app
|
||||
COPY web/package*.json ./
|
||||
RUN npm ci --prefer-offline
|
||||
COPY web/ ./
|
||||
|
||||
ARG VITE_BASE_URL=/
|
||||
ARG VITE_API_BASE=
|
||||
ENV VITE_BASE_URL=$VITE_BASE_URL
|
||||
ENV VITE_API_BASE=$VITE_API_BASE
|
||||
|
||||
RUN npm run build
|
||||
|
||||
# Stage 2: serve via nginx
|
||||
FROM nginx:alpine
|
||||
COPY docker/web/nginx.conf /etc/nginx/conf.d/default.conf
|
||||
COPY --from=build /app/dist /usr/share/nginx/html
|
||||
EXPOSE 80
|
||||
17
docker/web/nginx.conf
Normal file
17
docker/web/nginx.conf
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
server {
|
||||
listen 80;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# SPA routing — all non-asset paths → index.html
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Proxy API requests to FastAPI
|
||||
location /api/ {
|
||||
proxy_pass http://localhost:8521;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
}
|
||||
19
environment.yml
Normal file
19
environment.yml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
name: pagepiper
|
||||
channels:
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- python=3.11
|
||||
- pip
|
||||
- pip:
|
||||
- fastapi>=0.110
|
||||
- uvicorn[standard]>=0.29
|
||||
- rank-bm25>=0.2
|
||||
- PyYAML>=6.0
|
||||
- httpx>=0.27
|
||||
- pdfplumber>=0.11
|
||||
- pytesseract>=0.3
|
||||
- Pillow>=10.0
|
||||
- sqlite-vec>=0.1
|
||||
- pytest>=8.0
|
||||
- pytest-asyncio>=0.23
|
||||
50
manage.sh
Executable file
50
manage.sh
Executable file
|
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SERVICE=pagepiper
|
||||
WEB_PORT=8521
|
||||
COMPOSE_FILE="compose.yml"
|
||||
|
||||
OVERRIDE_FLAG=""
|
||||
[[ -f "compose.override.yml" ]] && OVERRIDE_FLAG="-f compose.override.yml"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 {start|stop|restart|status|logs [svc]|open|build|test}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
cmd="${1:-help}"
|
||||
shift || true
|
||||
|
||||
case "$cmd" in
|
||||
start)
|
||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
|
||||
echo "Pagepiper running → http://localhost:${WEB_PORT}"
|
||||
;;
|
||||
stop)
|
||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG down
|
||||
;;
|
||||
restart)
|
||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG down
|
||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
|
||||
echo "Pagepiper running → http://localhost:${WEB_PORT}"
|
||||
;;
|
||||
status)
|
||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG ps
|
||||
;;
|
||||
logs)
|
||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG logs -f "${1:-}"
|
||||
;;
|
||||
open)
|
||||
xdg-open "http://localhost:${WEB_PORT}" 2>/dev/null || open "http://localhost:${WEB_PORT}"
|
||||
;;
|
||||
build)
|
||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG build --no-cache
|
||||
;;
|
||||
test)
|
||||
conda run -n cf pytest tests/ -v
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
27
pyproject.toml
Normal file
27
pyproject.toml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
[build-system]
|
||||
requires = ["setuptools>=68", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "pagepiper"
|
||||
version = "0.1.0"
|
||||
description = "Self-hosted PDF library manager with RAG chat and page-level citations"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"fastapi>=0.110",
|
||||
"uvicorn[standard]>=0.29",
|
||||
"python-multipart>=0.0.9",
|
||||
"rank-bm25>=0.2",
|
||||
"PyYAML>=6.0",
|
||||
"httpx>=0.27",
|
||||
"circuitforge-core[pdf,vector]>=0.19.0",
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["app*"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
asyncio_mode = "auto"
|
||||
Loading…
Reference in a new issue