Compare commits

..

No commits in common. "006738f7b3401576a42a945e251d1b26c5b4878b" and "a7d9bd075ac5578b37298626debc41552f4365e1" have entirely different histories.

20 changed files with 111 additions and 4127 deletions

5
.gitignore vendored
View file

@ -22,8 +22,3 @@ config/user.yaml
config/.backup-* config/.backup-*
config/integrations/*.yaml config/integrations/*.yaml
!config/integrations/*.yaml.example !config/integrations/*.yaml.example
# companyScraper runtime artifacts
scrapers/.cache/
scrapers/.debug/
scrapers/raw_scrapes/

View file

@ -1,38 +0,0 @@
# Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
# Large image (~12-15 GB after build). Built once, cached on rebuilds.
# GPU strongly recommended. CPU fallback works but training is very slow.
#
# Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
# If your GPU requires a different CUDA version, change the FROM line and
# reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
WORKDIR /app
# Build tools needed by bitsandbytes CUDA kernels and unsloth
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ git libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# Install training stack.
# unsloth detects CUDA version automatically from the base image.
RUN pip install --no-cache-dir \
"unsloth @ git+https://github.com/unslothai/unsloth.git" \
"datasets>=2.18" "trl>=0.8" peft transformers \
"bitsandbytes>=0.43.0" accelerate sentencepiece \
requests pyyaml
COPY scripts/ /app/scripts/
COPY config/ /app/config/
ENV PYTHONUNBUFFERED=1
# Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
ENV CUDA_VISIBLE_DEVICES=0
# Runtime env vars injected by compose.yml:
# OLLAMA_URL — Ollama API base (default: http://ollama:11434)
# OLLAMA_MODELS_MOUNT — finetune container's mount path for ollama models volume
# OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
# DOCS_DIR — cover letters + training data root (default: /docs)
ENTRYPOINT ["python", "scripts/finetune_local.py"]

View file

@ -1,66 +1,36 @@
# Makefile — Peregrine convenience targets # Makefile — Peregrine convenience targets
# Usage: make <target> # Usage: make <target>
.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help .PHONY: setup preflight start stop restart logs test clean help
PROFILE ?= remote PROFILE ?= remote
PYTHON ?= python3 PYTHON ?= python3
# Auto-detect container engine: prefer docker compose, fall back to podman setup: ## Install dependencies (Docker, NVIDIA toolkit)
COMPOSE ?= $(shell \
command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 \
&& podman compose version >/dev/null 2>&1 \
&& echo "podman compose" \
|| echo "podman-compose"))
# GPU profiles require an overlay for NVIDIA device reservations.
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
COMPOSE_FILES := -f compose.yml
ifneq (,$(findstring podman,$(COMPOSE)))
ifneq (,$(findstring gpu,$(PROFILE)))
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
endif
else
ifneq (,$(findstring gpu,$(PROFILE)))
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
endif
endif
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
@bash setup.sh @bash setup.sh
preflight: ## Check ports + system resources; write .env preflight: ## Check ports + system resources; write .env
@$(PYTHON) scripts/preflight.py @$(PYTHON) scripts/preflight.py
start: preflight ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu) start: preflight ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d docker compose --profile $(PROFILE) up -d
stop: ## Stop all Peregrine services stop: ## Stop all Peregrine services
$(COMPOSE) down docker compose down
restart: preflight ## Preflight check then restart all services restart: preflight ## Preflight check then restart all services
$(COMPOSE) down && $(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d docker compose down && docker compose --profile $(PROFILE) up -d
logs: ## Tail app logs logs: ## Tail app logs
$(COMPOSE) logs -f app docker compose logs -f app
test: ## Run the test suite test: ## Run the test suite
@$(PYTHON) -m pytest tests/ -v $(PYTHON) -m pytest tests/ -v
prepare-training: ## Scan docs_dir for cover letters and build training JSONL
$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
finetune: ## Fine-tune your personal cover letter model (run prepare-training first)
@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE) clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
@echo "WARNING: This will delete all Peregrine containers and data." @echo "WARNING: This will delete all Peregrine containers and data."
@read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ] @read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ]
$(COMPOSE) down --rmi local --volumes docker compose down --rmi local --volumes
help: ## Show this help help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \

View file

@ -2,8 +2,6 @@
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)** **AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**
> *"Don't be evil, for real and forever."*
Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep. Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
Privacy-first, local-first. Your data never leaves your machine. Privacy-first, local-first. Your data never leaves your machine.

View file

@ -403,9 +403,9 @@ elif step == 5:
st.caption("Change only if services run on non-default ports or remote hosts.") st.caption("Change only if services run on non-default ports or remote hosts.")
svc = dict(saved_yaml.get("services", {})) svc = dict(saved_yaml.get("services", {}))
for svc_name, default_host, default_port in [ for svc_name, default_host, default_port in [
("ollama", "ollama", 11434), # Docker service name ("ollama", "localhost", 11434),
("vllm", "vllm", 8000), # Docker service name ("vllm", "localhost", 8000),
("searxng", "searxng", 8080), # Docker internal port (host-mapped: 8888) ("searxng", "localhost", 8888),
]: ]:
c1, c2 = st.columns([3, 1]) c1, c2 = st.columns([3, 1])
svc[f"{svc_name}_host"] = c1.text_input( svc[f"{svc_name}_host"] = c1.text_input(

View file

@ -1026,10 +1026,9 @@ with tab_finetune:
if ft_step == 1: if ft_step == 1:
st.markdown("**Step 1: Upload Cover Letters**") st.markdown("**Step 1: Upload Cover Letters**")
st.caption("Accepted formats: `.md` or `.txt`. Convert PDFs to text before uploading.")
uploaded = st.file_uploader( uploaded = st.file_uploader(
"Upload cover letters (.md or .txt)", "Upload cover letters (PDF, DOCX, or TXT)",
type=["md", "txt"], type=["pdf", "docx", "txt"],
accept_multiple_files=True, accept_multiple_files=True,
) )
if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"): if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"):
@ -1041,45 +1040,18 @@ with tab_finetune:
st.rerun() st.rerun()
elif ft_step == 2: elif ft_step == 2:
st.markdown("**Step 2: Extract Training Pairs**") st.markdown("**Step 2: Preview Training Pairs**")
import json as _json st.info("Run `python scripts/prepare_training_data.py` to extract pairs, then return here.")
import sqlite3 as _sqlite3
from scripts.db import DEFAULT_DB as _FT_DB
jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl" jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl"
# Show task status
_ft_conn = _sqlite3.connect(_FT_DB)
_ft_conn.row_factory = _sqlite3.Row
_ft_task = _ft_conn.execute(
"SELECT * FROM background_tasks WHERE task_type='prepare_training' ORDER BY id DESC LIMIT 1"
).fetchone()
_ft_conn.close()
if _ft_task:
_ft_status = _ft_task["status"]
if _ft_status == "completed":
st.success(f"{_ft_task['error'] or 'Extraction complete'}")
elif _ft_status in ("running", "queued"):
st.info(f"{_ft_status.capitalize()}… refresh to check progress.")
elif _ft_status == "failed":
st.error(f"Extraction failed: {_ft_task['error']}")
if st.button("⚙️ Extract Training Pairs", type="primary", key="ft_extract2"):
from scripts.task_runner import submit_task as _ft_submit
_ft_submit(_FT_DB, "prepare_training", 0)
st.info("Extracting in the background — refresh in a moment.")
st.rerun()
if jsonl_path.exists(): if jsonl_path.exists():
import json as _json
pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()] pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()]
st.caption(f"{len(pairs)} training pairs ready.") st.caption(f"{len(pairs)} training pairs extracted.")
for i, p in enumerate(pairs[:3]): for i, p in enumerate(pairs[:3]):
with st.expander(f"Pair {i+1}"): with st.expander(f"Pair {i+1}"):
st.text(p.get("output", p.get("input", ""))[:300]) st.text(p.get("input", "")[:300])
else: else:
st.caption("No training pairs yet — click Extract above.") st.warning("No training pairs found. Run `prepare_training_data.py` first.")
col_back, col_next = st.columns([1, 4]) col_back, col_next = st.columns([1, 4])
if col_back.button("← Back", key="ft_back2"): if col_back.button("← Back", key="ft_back2"):
st.session_state.ft_step = 1 st.session_state.ft_step = 1
@ -1089,45 +1061,13 @@ with tab_finetune:
st.rerun() st.rerun()
elif ft_step == 3: elif ft_step == 3:
st.markdown("**Step 3: Fine-Tune**") st.markdown("**Step 3: Train**")
st.slider("Epochs", 3, 20, 10, key="ft_epochs")
_ft_profile_name = ((_profile.name.split() or ["cover"])[0].lower() if st.button("🚀 Start Fine-Tune", type="primary", key="ft_start"):
if _profile else "cover") st.info("Fine-tune queued as a background task. Check back in 3060 minutes.")
_ft_model_name = f"{_ft_profile_name}-cover-writer" if st.button("← Back", key="ft_back3"):
st.info(
"Run the command below from your terminal. Training takes 3090 min on GPU "
"and registers the model automatically when complete."
)
st.code("make finetune PROFILE=single-gpu", language="bash")
st.caption(
f"Your model will appear as **{_ft_model_name}:latest** in Ollama. "
"Cover letter generation will use it automatically."
)
st.markdown("**Model status:**")
try:
import os as _os
import requests as _ft_req
_ollama_url = _os.environ.get("OLLAMA_URL", "http://localhost:11434")
_tags = _ft_req.get(f"{_ollama_url}/api/tags", timeout=3)
if _tags.status_code == 200:
_model_names = [m["name"] for m in _tags.json().get("models", [])]
if any(_ft_model_name in m for m in _model_names):
st.success(f"✅ `{_ft_model_name}:latest` is ready in Ollama!")
else:
st.warning(f"⏳ `{_ft_model_name}:latest` not registered yet.")
else:
st.caption("Ollama returned an unexpected response.")
except Exception:
st.caption("Could not reach Ollama — ensure services are running with `make start`.")
col_back, col_refresh = st.columns([1, 3])
if col_back.button("← Back", key="ft_back3"):
st.session_state.ft_step = 2 st.session_state.ft_step = 2
st.rerun() st.rerun()
if col_refresh.button("🔄 Check model status", key="ft_refresh3"):
st.rerun()
# ── Developer tab ───────────────────────────────────────────────────────────── # ── Developer tab ─────────────────────────────────────────────────────────────
if _show_dev_tab: if _show_dev_tab:

View file

@ -1,46 +0,0 @@
# compose.gpu.yml — Docker NVIDIA GPU overlay
#
# Adds NVIDIA GPU reservations to Peregrine services.
# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
#
# Prerequisites:
# sudo nvidia-ctk runtime configure --runtime=docker
# sudo systemctl restart docker
#
services:
ollama:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
vision:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
vllm:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
finetune:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]

View file

@ -1,43 +0,0 @@
# compose.podman-gpu.yml — Podman GPU override
#
# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
# when podman/podman-compose is detected, or manually:
# podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
#
# Prerequisites:
# sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
# (requires nvidia-container-toolkit >= 1.14)
#
services:
ollama:
devices:
- nvidia.com/gpu=0
deploy:
resources:
reservations:
devices: []
vision:
devices:
- nvidia.com/gpu=0
deploy:
resources:
reservations:
devices: []
vllm:
devices:
- nvidia.com/gpu=1
deploy:
resources:
reservations:
devices: []
finetune:
devices:
- nvidia.com/gpu=0
deploy:
resources:
reservations:
devices: []

View file

@ -12,7 +12,6 @@ services:
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs - ${DOCS_DIR:-~/Documents/JobSearch}:/docs
environment: environment:
- STAGING_DB=/app/data/staging.db - STAGING_DB=/app/data/staging.db
- DOCS_DIR=/docs
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-} - OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-} - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
@ -48,6 +47,18 @@ services:
profiles: [cpu, single-gpu, dual-gpu] profiles: [cpu, single-gpu, dual-gpu]
restart: unless-stopped restart: unless-stopped
ollama-gpu:
extends:
service: ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
profiles: [single-gpu, dual-gpu]
vision: vision:
build: build:
context: . context: .
@ -57,6 +68,13 @@ services:
environment: environment:
- VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2} - VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
- VISION_REVISION=${VISION_REVISION:-2025-01-09} - VISION_REVISION=${VISION_REVISION:-2025-01-09}
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
profiles: [single-gpu, dual-gpu] profiles: [single-gpu, dual-gpu]
restart: unless-stopped restart: unless-stopped
@ -74,24 +92,12 @@ services:
--enforce-eager --enforce-eager
--max-num-seqs 8 --max-num-seqs 8
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0} --cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
profiles: [dual-gpu] profiles: [dual-gpu]
restart: unless-stopped restart: unless-stopped
finetune:
build:
context: .
dockerfile: Dockerfile.finetune
volumes:
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
- ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
- ./config:/app/config
environment:
- DOCS_DIR=/docs
- OLLAMA_URL=http://ollama:11434
- OLLAMA_MODELS_MOUNT=/ollama-models
- OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
depends_on:
ollama:
condition: service_started
profiles: [cpu, single-gpu, dual-gpu]
restart: "no"

View file

@ -21,26 +21,26 @@ backends:
type: openai_compat type: openai_compat
ollama: ollama:
api_key: ollama api_key: ollama
base_url: http://ollama:11434/v1 base_url: http://localhost:11434/v1
enabled: true enabled: true
model: llama3.2:3b model: meghan-cover-writer:latest
supports_images: false supports_images: false
type: openai_compat type: openai_compat
ollama_research: ollama_research:
api_key: ollama api_key: ollama
base_url: http://ollama:11434/v1 base_url: http://localhost:11434/v1
enabled: true enabled: true
model: llama3.2:3b model: llama3.1:8b
supports_images: false supports_images: false
type: openai_compat type: openai_compat
vision_service: vision_service:
base_url: http://vision:8002 base_url: http://localhost:8002
enabled: true enabled: true
supports_images: true supports_images: true
type: vision_service type: vision_service
vllm: vllm:
api_key: '' api_key: ''
base_url: http://vllm:8000/v1 base_url: http://localhost:8000/v1
enabled: true enabled: true
model: __auto__ model: __auto__
supports_images: false supports_images: false

View file

@ -21,21 +21,21 @@ backends:
supports_images: false supports_images: false
ollama: ollama:
api_key: ollama api_key: ollama
base_url: http://ollama:11434/v1 # Docker service name; use localhost:11434 outside Docker base_url: http://localhost:11434/v1
enabled: true enabled: true
model: llama3.2:3b model: meghan-cover-writer:latest
type: openai_compat type: openai_compat
supports_images: false supports_images: false
ollama_research: ollama_research:
api_key: ollama api_key: ollama
base_url: http://ollama:11434/v1 # Docker service name; use localhost:11434 outside Docker base_url: http://localhost:11434/v1
enabled: true enabled: true
model: llama3.2:3b model: llama3.1:8b
type: openai_compat type: openai_compat
supports_images: false supports_images: false
vllm: vllm:
api_key: '' api_key: ''
base_url: http://vllm:8000/v1 # Docker service name; use localhost:8000 outside Docker base_url: http://localhost:8000/v1
enabled: true enabled: true
model: __auto__ model: __auto__
type: openai_compat type: openai_compat

View file

@ -44,15 +44,15 @@ inference_profile: "remote" # remote | cpu | single-gpu | dual-gpu
services: services:
streamlit_port: 8501 streamlit_port: 8501
ollama_host: ollama # Docker service name; use "localhost" if running outside Docker ollama_host: localhost
ollama_port: 11434 ollama_port: 11434
ollama_ssl: false ollama_ssl: false
ollama_ssl_verify: true ollama_ssl_verify: true
vllm_host: vllm # Docker service name; use "localhost" if running outside Docker vllm_host: localhost
vllm_port: 8000 vllm_port: 8000
vllm_ssl: false vllm_ssl: false
vllm_ssl_verify: true vllm_ssl_verify: true
searxng_host: searxng # Docker service name; use "localhost" if running outside Docker searxng_host: localhost
searxng_port: 8080 # internal Docker port; use 8888 for host-mapped access searxng_port: 8888
searxng_ssl: false searxng_ssl: false
searxng_ssl_verify: true searxng_ssl_verify: true

View file

@ -43,7 +43,7 @@ Unscheduled ideas and deferred features. Roughly grouped by area.
## Container Runtime ## Container Runtime
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install. - **Podman support** — Update `Makefile` to auto-detect `docker compose` vs `podman-compose` (e.g. `COMPOSE ?= $(shell command -v docker 2>/dev/null && echo "docker compose" || echo "podman-compose")`). Note in README that rootless Podman requires CDI GPU device spec (`nvidia.com/gpu=all`) instead of `runtime: nvidia` in `compose.yml`.
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean. - **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
--- ---

View file

@ -1,367 +0,0 @@
# CircuitForge License Server — Design Document
**Date:** 2026-02-25
**Status:** Approved — ready for implementation
---
## Goal
Build a self-hosted licensing server for Circuit Forge LLC products. v1 serves Peregrine; schema is multi-product from day one. Enforces free / paid / premium / ultra tier gates with offline-capable JWT validation, 30-day refresh cycle, 7-day grace period, seat tracking, usage telemetry, and a content violation flagging foundation.
## Architecture
```
┌─────────────────────────────────────────────────┐
│ circuitforge-license (Heimdall:8600) │
│ FastAPI + SQLite + RS256 JWT │
│ │
│ Public API (/v1/…): │
│ POST /v1/activate → issue JWT │
│ POST /v1/refresh → renew JWT │
│ POST /v1/deactivate → free a seat │
│ POST /v1/usage → record usage event │
│ POST /v1/flag → report violation │
│ │
│ Admin API (/admin/…, bearer token): │
│ POST/GET /admin/keys → CRUD keys │
│ DELETE /admin/keys/{id} → revoke │
│ GET /admin/activations → audit │
│ GET /admin/usage → telemetry │
│ GET/PATCH /admin/flags → flag review │
└─────────────────────────────────────────────────┘
↑ HTTPS via Caddy (license.circuitforge.com)
┌─────────────────────────────────────────────────┐
│ Peregrine (user's machine) │
│ scripts/license.py │
│ │
│ activate(key) → POST /v1/activate │
│ writes config/license.json │
│ verify_local() → validates JWT offline │
│ using embedded public key │
│ refresh_if_needed() → called on app startup │
│ effective_tier() → tier string for can_use() │
│ report_usage(…) → fire-and-forget telemetry │
│ report_flag(…) → fire-and-forget violation │
└─────────────────────────────────────────────────┘
```
**Key properties:**
- Peregrine verifies tier **offline** on every check — RS256 public key embedded at build time
- Network required only at activation and 30-day refresh
- Revoked keys stop working at next refresh cycle (≤30 day lag — acceptable for v1)
- `config/license.json` gitignored; missing = free tier
---
## Crypto: RS256 (asymmetric JWT)
- **Private key** — lives only on the license server (`keys/private.pem`, gitignored)
- **Public key** — committed to both the license server repo and Peregrine (`scripts/license_public_key.pem`)
- Peregrine can verify JWT authenticity without ever knowing the private key
- A stolen JWT cannot be forged without the private key
- Revocation: server refuses refresh; old JWT valid until expiry then grace period expires
**Key generation (one-time, on Heimdall):**
```bash
openssl genrsa -out keys/private.pem 2048
openssl rsa -in keys/private.pem -pubout -out keys/public.pem
# copy keys/public.pem → peregrine/scripts/license_public_key.pem
```
---
## Database Schema
```sql
CREATE TABLE license_keys (
id TEXT PRIMARY KEY, -- UUID
key_display TEXT UNIQUE NOT NULL, -- CFG-PRNG-XXXX-XXXX-XXXX
product TEXT NOT NULL, -- peregrine | falcon | osprey | …
tier TEXT NOT NULL, -- paid | premium | ultra
seats INTEGER DEFAULT 1,
valid_until TEXT, -- ISO date or NULL (perpetual)
revoked INTEGER DEFAULT 0,
customer_email TEXT, -- proper field, not buried in notes
source TEXT DEFAULT 'manual', -- manual | beta | promo | stripe
trial INTEGER DEFAULT 0, -- 1 = time-limited trial key
notes TEXT,
created_at TEXT NOT NULL
);
CREATE TABLE activations (
id TEXT PRIMARY KEY,
key_id TEXT NOT NULL REFERENCES license_keys(id),
machine_id TEXT NOT NULL, -- sha256(hostname + MAC)
app_version TEXT, -- Peregrine version at last refresh
platform TEXT, -- linux | macos | windows | docker
activated_at TEXT NOT NULL,
last_refresh TEXT NOT NULL,
deactivated_at TEXT -- NULL = still active
);
CREATE TABLE usage_events (
id TEXT PRIMARY KEY,
key_id TEXT NOT NULL REFERENCES license_keys(id),
machine_id TEXT NOT NULL,
product TEXT NOT NULL,
event_type TEXT NOT NULL, -- cover_letter_generated |
-- company_research | email_sync |
-- interview_prep | survey | etc.
metadata TEXT, -- JSON blob for context
created_at TEXT NOT NULL
);
CREATE TABLE flags (
id TEXT PRIMARY KEY,
key_id TEXT NOT NULL REFERENCES license_keys(id),
machine_id TEXT,
product TEXT NOT NULL,
flag_type TEXT NOT NULL, -- content_violation | tos_violation |
-- abuse | manual
details TEXT, -- JSON: prompt snippet, output excerpt
status TEXT DEFAULT 'open', -- open | reviewed | dismissed | actioned
created_at TEXT NOT NULL,
reviewed_at TEXT,
action_taken TEXT -- none | warned | revoked
);
CREATE TABLE audit_log (
id TEXT PRIMARY KEY,
entity_type TEXT NOT NULL, -- key | activation | flag
entity_id TEXT NOT NULL,
action TEXT NOT NULL, -- created | revoked | activated |
-- deactivated | flag_actioned
actor TEXT, -- admin identifier (future multi-admin)
details TEXT, -- JSON
created_at TEXT NOT NULL
);
```
**Flags scope (v1):** Schema and `POST /v1/flag` endpoint capture data. No admin enforcement UI in v1 — query DB directly. Build review UI in v2 when there's data to act on.
---
## JWT Payload
```json
{
"sub": "CFG-PRNG-A1B2-C3D4-E5F6",
"product": "peregrine",
"tier": "paid",
"seats": 2,
"machine": "a3f9c2…",
"notice": "Version 1.1 available — see circuitforge.com/update",
"iat": 1740000000,
"exp": 1742592000
}
```
`notice` is optional — set via a server config value; included in refresh responses so Peregrine can surface it as a banner. No DB table needed.
---
## Key Format
`CFG-PRNG-A1B2-C3D4-E5F6`
- `CFG` — Circuit Forge
- `PRNG` / `FLCN` / `OSPY` / … — 4-char product code
- Three random 4-char alphanumeric segments
- Human-readable, easy to copy/paste into a support email
---
## Endpoint Reference
| Method | Path | Auth | Purpose |
|--------|------|------|---------|
| POST | `/v1/activate` | none | Issue JWT for key + machine |
| POST | `/v1/refresh` | JWT bearer | Renew JWT before expiry |
| POST | `/v1/deactivate` | JWT bearer | Free a seat |
| POST | `/v1/usage` | JWT bearer | Record usage event (fire-and-forget) |
| POST | `/v1/flag` | JWT bearer | Report content/ToS violation |
| POST | `/admin/keys` | admin token | Create a new key |
| GET | `/admin/keys` | admin token | List all keys + activation counts |
| DELETE | `/admin/keys/{id}` | admin token | Revoke a key |
| GET | `/admin/activations` | admin token | Full activation audit |
| GET | `/admin/usage` | admin token | Usage breakdown per key/product/event |
| GET | `/admin/flags` | admin token | List flags (open by default) |
| PATCH | `/admin/flags/{id}` | admin token | Update flag status + action |
---
## Peregrine Client (`scripts/license.py`)
**Public API:**
```python
def activate(key: str) -> dict # POST /v1/activate, writes license.json
def verify_local() -> dict | None # validates JWT offline; None = free tier
def refresh_if_needed() -> None # silent; called on app startup
def effective_tier() -> str # "free"|"paid"|"premium"|"ultra"
def report_usage(event_type: str, # fire-and-forget; failures silently dropped
metadata: dict = {}) -> None
def report_flag(flag_type: str, # fire-and-forget
details: dict) -> None
```
**`effective_tier()` decision tree:**
```
license.json missing or unreadable → "free"
JWT signature invalid → "free"
JWT product != "peregrine" → "free"
JWT not expired → tier from payload
JWT expired, within grace period → tier from payload + show banner
JWT expired, grace period expired → "free" + show banner
```
**`config/license.json` (gitignored):**
```json
{
"jwt": "eyJ…",
"key_display": "CFG-PRNG-A1B2-C3D4-E5F6",
"tier": "paid",
"valid_until": "2026-03-27",
"machine_id": "a3f9c2…",
"last_refresh": "2026-02-25T12:00:00Z",
"grace_until": null
}
```
**Integration point in `tiers.py`:**
```python
def effective_tier(profile) -> str:
from scripts.license import effective_tier as _license_tier
if profile.dev_tier_override: # dev override still works in dev mode
return profile.dev_tier_override
return _license_tier()
```
**Settings License tab** (new tab in `app/pages/2_Settings.py`):
- Text input: enter license key → calls `activate()` → shows result
- If active: tier badge, key display string, expiry date, seat count
- Grace period: amber banner with days remaining
- "Deactivate this machine" button → `/v1/deactivate`, deletes `license.json`
---
## Deployment
**Repo:** `git.opensourcesolarpunk.com/pyr0ball/circuitforge-license` (private)
**Repo layout:**
```
circuitforge-license/
├── app/
│ ├── main.py # FastAPI app
│ ├── db.py # SQLite helpers, schema init
│ ├── models.py # Pydantic models
│ ├── crypto.py # RSA sign/verify helpers
│ └── routes/
│ ├── public.py # /v1/* endpoints
│ └── admin.py # /admin/* endpoints
├── data/ # SQLite DB (named volume)
├── keys/
│ ├── private.pem # gitignored
│ └── public.pem # committed
├── scripts/
│ └── issue-key.sh # curl wrapper for key issuance
├── tests/
├── Dockerfile
├── docker-compose.yml
├── .env.example
└── requirements.txt
```
**`docker-compose.yml` (on Heimdall):**
```yaml
services:
license:
build: .
restart: unless-stopped
ports:
- "127.0.0.1:8600:8600"
volumes:
- license_data:/app/data
- ./keys:/app/keys:ro
env_file: .env
volumes:
license_data:
```
**`.env` (gitignored):**
```
ADMIN_TOKEN=<long random string>
JWT_PRIVATE_KEY_PATH=/app/keys/private.pem
JWT_PUBLIC_KEY_PATH=/app/keys/public.pem
JWT_EXPIRY_DAYS=30
GRACE_PERIOD_DAYS=7
```
**Caddy block (add to Heimdall Caddyfile):**
```caddy
license.circuitforge.com {
reverse_proxy localhost:8600
}
```
---
## Admin Workflow (v1)
All operations via `curl` or `scripts/issue-key.sh`:
```bash
# Issue a key
./scripts/issue-key.sh --product peregrine --tier paid --seats 2 \
--email user@example.com --notes "Beta — manual payment 2026-02-25"
# → CFG-PRNG-A1B2-C3D4-E5F6 (email to customer)
# List all keys
curl https://license.circuitforge.com/admin/keys \
-H "Authorization: Bearer $ADMIN_TOKEN"
# Revoke a key
curl -X DELETE https://license.circuitforge.com/admin/keys/{id} \
-H "Authorization: Bearer $ADMIN_TOKEN"
```
---
## Testing Strategy
**License server:**
- pytest with in-memory SQLite and generated test keypair
- All endpoints tested: activate, refresh, deactivate, usage, flag, admin CRUD
- Seat limit enforcement, expiry, revocation all unit tested
**Peregrine client:**
- `verify_local()` tested with pre-signed test JWT using test keypair
- `activate()` / `refresh()` tested with `httpx` mocks
- `effective_tier()` tested across all states: valid, expired, grace, revoked, missing
**Integration smoke test:**
```bash
docker compose up -d
# create test key via admin API
# call /v1/activate with test key
# verify JWT signature with public key
# verify /v1/refresh extends expiry
```
---
## Decisions Log
| Decision | Rationale |
|----------|-----------|
| RS256 over HS256 | Public key embeddable in client; private key never leaves server |
| SQLite over Postgres | Matches Peregrine's SQLite-first philosophy; trivially backupable |
| 30-day JWT lifetime | Standard SaaS pattern; invisible to users in normal operation |
| 7-day grace period | Covers travel, network outages, server maintenance |
| Flags v1: capture only | No volume to justify review UI yet; add in v2 |
| No payment integration | Manual issuance until customer volume justifies automation |
| Multi-product schema | Adding a column now vs migrating a live DB later |
| Separate repo | License server is infrastructure, not part of Peregrine's BSL scope |

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -32,12 +32,7 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
# ── Config ──────────────────────────────────────────────────────────────────── # ── Config ────────────────────────────────────────────────────────────────────
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
# DOCS_DIR env var overrides user_profile when running inside Docker _docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
_docs_env = os.environ.get("DOCS_DIR", "")
_docs = Path(_docs_env) if _docs_env else (
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
)
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl" LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
OUTPUT_DIR = _docs / "training_data" / "finetune_output" OUTPUT_DIR = _docs / "training_data" / "finetune_output"
GGUF_DIR = _docs / "training_data" / "gguf" GGUF_DIR = _docs / "training_data" / "gguf"
@ -71,7 +66,7 @@ print(f"{'='*60}\n")
# ── Load dataset ────────────────────────────────────────────────────────────── # ── Load dataset ──────────────────────────────────────────────────────────────
if not LETTERS_JSONL.exists(): if not LETTERS_JSONL.exists():
sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n" sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
"Run: make prepare-training (or: python scripts/prepare_training_data.py)") "Run: conda run -n job-seeker python scripts/prepare_training_data.py")
records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()] records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
print(f"Loaded {len(records)} training examples.") print(f"Loaded {len(records)} training examples.")
@ -227,102 +222,35 @@ if not args.no_gguf and USE_UNSLOTH:
else: else:
gguf_path = None gguf_path = None
# ── Register with Ollama (auto) ──────────────────────────────────────────────── # ── Print next steps ──────────────────────────────────────────────────────────
print(f"\n{'='*60}")
def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool: print(" DONE — next steps to load into Ollama:")
""" print(f"{'='*60}")
Copy GGUF into the shared Ollama models volume and register via the API.
Works in two modes:
Containerised OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
translate the container path into Ollama's view of the file.
Local gguf_path is an absolute path Ollama can read directly.
"""
import shutil
import requests
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
models_mount = os.environ.get("OLLAMA_MODELS_MOUNT", "")
ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
# ── Place GGUF where Ollama can read it ───────────────────────────────────
if models_mount and ollama_models_dir:
# Containerised: write into the shared volume; Ollama reads from its own mount.
dest_dir = Path(models_mount) / "custom"
dest_dir.mkdir(parents=True, exist_ok=True)
dest = dest_dir / gguf_path.name
if dest != gguf_path:
print(f"Copying GGUF → shared volume: {dest}")
shutil.copy2(gguf_path, dest)
ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
else:
# Local: pass the absolute path directly.
ollama_gguf = str(gguf_path.resolve())
modelfile_text = (
f"FROM {ollama_gguf}\n"
f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
f"PARAMETER temperature 0.7\n"
f"PARAMETER top_p 0.9\n"
f"PARAMETER num_ctx 32768\n"
)
# Write Modelfile to disk as a reference (useful for debugging)
(OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
# ── Create via Ollama API ─────────────────────────────────────────────────
print(f"\nRegistering '{model_name}' with Ollama at {ollama_url}")
try:
r = requests.post(
f"{ollama_url}/api/create",
json={"name": model_name, "modelfile": modelfile_text},
timeout=300,
stream=True,
)
for line in r.iter_lines():
if line:
import json as _json
try:
msg = _json.loads(line).get("status", "")
except Exception:
msg = line.decode()
if msg:
print(f" {msg}")
if r.status_code != 200:
print(f" WARNING: Ollama returned HTTP {r.status_code}")
return False
except Exception as exc:
print(f" Ollama registration failed: {exc}")
print(f" Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
return False
# ── Update config/llm.yaml ────────────────────────────────────────────────
llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
if llm_yaml.exists():
try:
import yaml as _yaml
cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
if "backends" in cfg and "ollama" in cfg["backends"]:
cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
llm_yaml.write_text(
_yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
)
print(f" llm.yaml updated → ollama.model = {model_name}:latest")
except Exception as exc:
print(f" Could not update llm.yaml automatically: {exc}")
print(f"\n{'='*60}")
print(f" Model ready: {model_name}:latest")
print(f" Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
print(f"{'='*60}\n")
return True
if gguf_path and gguf_path.exists(): if gguf_path and gguf_path.exists():
_auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT) modelfile = OUTPUT_DIR / "Modelfile"
modelfile.write_text(f"""FROM {gguf_path}
SYSTEM \"\"\"
{SYSTEM_PROMPT}
\"\"\"
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER num_ctx 32768
""")
print(f"\n1. Modelfile written to: {modelfile}")
print(f"\n2. Create the Ollama model:")
print(f" ollama create {OLLAMA_NAME} -f {modelfile}")
print(f"\n3. Test it:")
print(f" ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
print(f" then pick it in Settings → LLM Backends → Ollama → Model.")
else: else:
print(f"\n{'='*60}") print(f"\n Adapter only (no GGUF). To convert manually:")
print(" Adapter saved (no GGUF produced).") print(f" 1. Merge adapter:")
print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.") print(f" conda run -n ogma python -c \"")
print(f" Adapter path: {adapter_path}") print(f" from peft import AutoPeftModelForCausalLM")
print(f"{'='*60}\n") print(f" m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
print(f" m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
print(f" 2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
print(f" 3. ollama create {OLLAMA_NAME} -f Modelfile")
print()

View file

@ -12,7 +12,6 @@ Usage:
""" """
import argparse import argparse
import json import json
import os
import re import re
import sys import sys
from pathlib import Path from pathlib import Path
@ -23,10 +22,7 @@ from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml" _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_docs_env = os.environ.get("DOCS_DIR", "") _docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
_docs = Path(_docs_env) if _docs_env else (
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
)
LETTERS_DIR = _docs LETTERS_DIR = _docs
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter") # Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"] LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
@ -81,16 +77,6 @@ def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]:
if p not in seen: if p not in seen:
seen.add(p) seen.add(p)
all_paths.append(p) all_paths.append(p)
# Also scan web-uploaded files (Settings → Fine-tune → Upload)
uploads_dir = letters_dir / "training_data" / "uploads"
if uploads_dir.exists():
for glob in ("*.md", "*.txt"):
for p in uploads_dir.glob(glob):
if p not in seen:
seen.add(p)
all_paths.append(p)
for path in sorted(all_paths): for path in sorted(all_paths):
text = path.read_text(encoding="utf-8", errors="ignore").strip() text = path.read_text(encoding="utf-8", errors="ignore").strip()
if not text or len(text) < 100: if not text or len(text) < 100:

View file

@ -243,17 +243,6 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
) )
return return
elif task_type == "prepare_training":
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
records = build_records()
write_jsonl(records, DEFAULT_OUTPUT)
n = len(records)
update_task_status(
db_path, task_id, "completed",
error=f"{n} training pair{'s' if n != 1 else ''} extracted",
)
return
else: else:
raise ValueError(f"Unknown task_type: {task_type!r}") raise ValueError(f"Unknown task_type: {task_type!r}")

127
setup.sh
View file

@ -64,35 +64,6 @@ install_git() {
success "git installed." success "git installed."
} }
# ── Podman detection ───────────────────────────────────────────────────────────
# If Podman is already present, skip Docker entirely and ensure podman-compose is available.
check_podman() {
if ! cmd_exists podman; then return 1; fi
success "Podman detected ($(podman --version)) — skipping Docker install."
# Ensure a compose provider is available
if podman compose version &>/dev/null 2>&1; then
success "podman compose available."
elif cmd_exists podman-compose; then
success "podman-compose available."
else
info "Installing podman-compose…"
case "$DISTRO_FAMILY" in
debian) $SUDO apt-get install -y podman-compose 2>/dev/null \
|| pip3 install --user podman-compose ;;
fedora) $SUDO dnf install -y podman-compose 2>/dev/null \
|| pip3 install --user podman-compose ;;
arch) $SUDO pacman -Sy --noconfirm podman-compose 2>/dev/null \
|| pip3 install --user podman-compose ;;
macos) brew install podman-compose 2>/dev/null \
|| pip3 install --user podman-compose ;;
esac
success "podman-compose installed."
fi
warn "GPU profiles (single-gpu, dual-gpu) require CDI setup:"
warn " sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
return 0
}
# ── Docker ───────────────────────────────────────────────────────────────────── # ── Docker ─────────────────────────────────────────────────────────────────────
install_docker_linux_debian() { install_docker_linux_debian() {
$SUDO apt-get update -q $SUDO apt-get update -q
@ -168,27 +139,6 @@ check_compose() {
fi fi
} }
# ── Docker daemon health check ──────────────────────────────────────────────────
check_docker_running() {
if docker info &>/dev/null 2>&1; then
success "Docker daemon is running."
return
fi
warn "Docker daemon is not responding."
if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then
info "Starting Docker service…"
$SUDO systemctl start docker 2>/dev/null || true
sleep 2
if docker info &>/dev/null 2>&1; then
success "Docker daemon started."
else
warn "Docker failed to start. Run: sudo systemctl start docker"
fi
elif [[ "$OS" == "Darwin" ]]; then
warn "Docker Desktop is not running. Start it, wait for the whale icon, then run 'make start'."
fi
}
# ── NVIDIA Container Toolkit ─────────────────────────────────────────────────── # ── NVIDIA Container Toolkit ───────────────────────────────────────────────────
install_nvidia_toolkit() { install_nvidia_toolkit() {
[[ "$OS" != "Linux" ]] && return # macOS has no NVIDIA support [[ "$OS" != "Linux" ]] && return # macOS has no NVIDIA support
@ -196,8 +146,8 @@ install_nvidia_toolkit() {
info "No NVIDIA GPU detected — skipping Container Toolkit." info "No NVIDIA GPU detected — skipping Container Toolkit."
return return
fi fi
if cmd_exists nvidia-ctk && nvidia-ctk runtime validate --runtime=docker &>/dev/null 2>&1; then if docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu22.04 nvidia-smi &>/dev/null 2>&1; then
success "NVIDIA Container Toolkit already configured." success "NVIDIA Container Toolkit already working."
return return
fi fi
info "NVIDIA GPU detected. Installing Container Toolkit…" info "NVIDIA GPU detected. Installing Container Toolkit…"
@ -226,8 +176,6 @@ install_nvidia_toolkit() {
} }
# ── Environment setup ────────────────────────────────────────────────────────── # ── Environment setup ──────────────────────────────────────────────────────────
# Note: Ollama runs as a Docker container — the compose.yml ollama service
# handles model download automatically on first start (see docker/ollama/entrypoint.sh).
setup_env() { setup_env() {
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [[ ! -f "$SCRIPT_DIR/.env" ]]; then if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
@ -238,88 +186,29 @@ setup_env() {
fi fi
} }
# ── Model weights storage ───────────────────────────────────────────────────────
_update_env_key() {
# Portable in-place key=value update for .env files (Linux + macOS).
# Appends the key if not already present.
local file="$1" key="$2" val="$3"
awk -v k="$key" -v v="$val" '
BEGIN { found=0 }
$0 ~ ("^" k "=") { print k "=" v; found=1; next }
{ print }
END { if (!found) print k "=" v }
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
}
configure_model_paths() {
local env_file
env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
# Skip prompts when stdin is not a terminal (e.g. curl | bash)
if [[ ! -t 0 ]]; then
info "Non-interactive — using default model paths from .env"
return
fi
echo ""
info "Model weights storage"
echo -e " AI models can be 230+ GB each. If you have a separate data drive,"
echo -e " point these at it now. Press Enter to keep the value shown in [brackets]."
echo ""
local current input
current="$(grep -E '^OLLAMA_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
[[ -z "$current" ]] && current="~/models/ollama"
read -rp " Ollama models dir [${current}]: " input || input=""
input="${input:-$current}"
input="${input/#\~/$HOME}"
mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
_update_env_key "$env_file" "OLLAMA_MODELS_DIR" "$input"
success "OLLAMA_MODELS_DIR=$input"
current="$(grep -E '^VLLM_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
[[ -z "$current" ]] && current="~/models/vllm"
read -rp " vLLM models dir [${current}]: " input || input=""
input="${input:-$current}"
input="${input/#\~/$HOME}"
mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
_update_env_key "$env_file" "VLLM_MODELS_DIR" "$input"
success "VLLM_MODELS_DIR=$input"
echo ""
}
# ── Main ─────────────────────────────────────────────────────────────────────── # ── Main ───────────────────────────────────────────────────────────────────────
main() { main() {
echo "" echo ""
echo -e "${BLUE}╔══════════════════════════════════════════════════════${NC}" echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ Peregrine — Dependency Installer ║${NC}" echo -e "${BLUE}║ Peregrine — Dependency Installer ║${NC}"
echo -e "${BLUE}║ by Circuit Forge LLC ║${NC}" echo -e "${BLUE}║ by Circuit Forge LLC ║${NC}"
echo -e "${BLUE}║ \"Don't be evil, for real and forever.\" ║${NC}" echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
echo -e "${BLUE}╚══════════════════════════════════════════════════════╝${NC}"
echo "" echo ""
install_git install_git
# Podman takes precedence if already installed; otherwise install Docker
if ! check_podman; then
install_docker install_docker
check_docker_running
check_compose check_compose
install_nvidia_toolkit install_nvidia_toolkit
fi
setup_env setup_env
configure_model_paths
echo "" echo ""
success "All dependencies installed." success "All dependencies installed."
echo "" echo ""
echo -e " ${GREEN}Next steps:${NC}" echo -e " ${GREEN}Next steps:${NC}"
echo -e " 1. Start Peregrine:" echo -e " 1. Edit ${YELLOW}.env${NC} to set your preferred ports and model paths"
echo -e " ${YELLOW}make start${NC} # remote/API-only (no local GPU)" echo -e " 2. Start Peregrine:"
echo -e " ${YELLOW}make start PROFILE=cpu${NC} # local Ollama inference (CPU)" echo -e " ${YELLOW}docker compose --profile remote up -d${NC}"
echo -e " 2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you" echo -e " 3. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
echo "" echo ""
if groups "$USER" 2>/dev/null | grep -q docker; then if groups "$USER" 2>/dev/null | grep -q docker; then
true true