fix: fix dual-gpu port conflict + move GPU config to overlay files

- Remove ollama-gpu service (was colliding with ollama on port 11434) - Strip inline deploy.resources GPU blocks from vision and vllm - Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0), vision (GPU 0), vllm (GPU 1), finetune (GPU 0) - Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match service name after removal of ollama-gpu - Update Makefile: apply compose.gpu.yml for Docker + GPU profiles (was only applying podman-gpu.yml for Podman + GPU profiles)
feat: wire fine-tune UI end-to-end + harden setup.sh
2026-02-25 16:44:59 -08:00 · 2026-02-25 16:31:53 -08:00 · 2026-02-25 16:22:48 -08:00 · 2026-02-25 16:08:14 -08:00 · 2026-02-25 16:03:10 -08:00 · 2026-02-25 15:42:56 -08:00
20 changed files with 4127 additions and 111 deletions
--- a/.gitignore
+++ b/.gitignore
@ -22,3 +22,8 @@ config/user.yaml
 config/.backup-*
 config/integrations/*.yaml
 !config/integrations/*.yaml.example
+
+# companyScraper runtime artifacts
+scrapers/.cache/
+scrapers/.debug/
+scrapers/raw_scrapes/
--- a/Dockerfile.finetune
+++ b/Dockerfile.finetune
@ -0,0 +1,38 @@
+# Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
+# Large image (~12-15 GB after build). Built once, cached on rebuilds.
+# GPU strongly recommended. CPU fallback works but training is very slow.
+#
+# Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
+# If your GPU requires a different CUDA version, change the FROM line and
+# reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
+FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
+
+WORKDIR /app
+
+# Build tools needed by bitsandbytes CUDA kernels and unsloth
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc g++ git libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install training stack.
+# unsloth detects CUDA version automatically from the base image.
+RUN pip install --no-cache-dir \
+    "unsloth @ git+https://github.com/unslothai/unsloth.git" \
+    "datasets>=2.18" "trl>=0.8" peft transformers \
+    "bitsandbytes>=0.43.0" accelerate sentencepiece \
+    requests pyyaml
+
+COPY scripts/ /app/scripts/
+COPY config/  /app/config/
+
+ENV PYTHONUNBUFFERED=1
+# Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
+ENV CUDA_VISIBLE_DEVICES=0
+
+# Runtime env vars injected by compose.yml:
+#   OLLAMA_URL              — Ollama API base (default: http://ollama:11434)
+#   OLLAMA_MODELS_MOUNT     — finetune container's mount path for ollama models volume
+#   OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
+#   DOCS_DIR                — cover letters + training data root (default: /docs)
+
+ENTRYPOINT ["python", "scripts/finetune_local.py"]
--- a/46
+++ b/46
@ -1,36 +1,66 @@
 # Makefile — Peregrine convenience targets
 # Usage: make <target>

-.PHONY: setup preflight start stop restart logs test clean help
+.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help

 PROFILE ?= remote
 PYTHON  ?= python3

-setup:          ## Install dependencies (Docker, NVIDIA toolkit)
+# Auto-detect container engine: prefer docker compose, fall back to podman
+COMPOSE ?= $(shell \
+  command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
+  && echo "docker compose" \
+  || (command -v podman >/dev/null 2>&1 \
+      && podman compose version >/dev/null 2>&1 \
+      && echo "podman compose" \
+      || echo "podman-compose"))
+
+# GPU profiles require an overlay for NVIDIA device reservations.
+# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
+# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
+COMPOSE_FILES := -f compose.yml
+ifneq (,$(findstring podman,$(COMPOSE)))
+  ifneq (,$(findstring gpu,$(PROFILE)))
+    COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
+  endif
+else
+  ifneq (,$(findstring gpu,$(PROFILE)))
+    COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
+  endif
+endif
+
+setup:          ## Install dependencies (Docker or Podman + NVIDIA toolkit)
 	@bash setup.sh

 preflight:      ## Check ports + system resources; write .env
 	@$(PYTHON) scripts/preflight.py

 start: preflight  ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
-	docker compose --profile $(PROFILE) up -d
+	$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d

 stop:           ## Stop all Peregrine services
-	docker compose down
+	$(COMPOSE) down

 restart: preflight  ## Preflight check then restart all services
-	docker compose down && docker compose --profile $(PROFILE) up -d
+	$(COMPOSE) down && $(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d

 logs:           ## Tail app logs
-	docker compose logs -f app
+	$(COMPOSE) logs -f app

 test:           ## Run the test suite
-	$(PYTHON) -m pytest tests/ -v
+	@$(PYTHON) -m pytest tests/ -v
+
+prepare-training: ## Scan docs_dir for cover letters and build training JSONL
+	$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
+
+finetune:       ## Fine-tune your personal cover letter model (run prepare-training first)
+	@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
+	$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune

 clean:          ## Remove containers, images, and data volumes (DESTRUCTIVE)
 	@echo "WARNING: This will delete all Peregrine containers and data."
 	@read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ]
-	docker compose down --rmi local --volumes
+	$(COMPOSE) down --rmi local --volumes

 help:           ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
--- a/README.md
+++ b/README.md
@ -2,6 +2,8 @@

 **AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**

+> *"Don't be evil, for real and forever."*
+
 Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
 Privacy-first, local-first. Your data never leaves your machine.

--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@ -403,9 +403,9 @@ elif step == 5:
        st.caption("Change only if services run on non-default ports or remote hosts.")
        svc = dict(saved_yaml.get("services", {}))
        for svc_name, default_host, default_port in [
-            ("ollama",  "localhost", 11434),
-            ("vllm",    "localhost", 8000),
-            ("searxng", "localhost", 8888),
+            ("ollama",  "ollama",   11434),  # Docker service name
+            ("vllm",    "vllm",     8000),   # Docker service name
+            ("searxng", "searxng",  8080),   # Docker internal port (host-mapped: 8888)
        ]:
            c1, c2 = st.columns([3, 1])
            svc[f"{svc_name}_host"] = c1.text_input(
--- a/app/pages/2_Settings.py
+++ b/app/pages/2_Settings.py
@ -1026,9 +1026,10 @@ with tab_finetune:

        if ft_step == 1:
            st.markdown("**Step 1: Upload Cover Letters**")
+            st.caption("Accepted formats: `.md` or `.txt`. Convert PDFs to text before uploading.")
            uploaded = st.file_uploader(
-                "Upload cover letters (PDF, DOCX, or TXT)",
-                type=["pdf", "docx", "txt"],
+                "Upload cover letters (.md or .txt)",
+                type=["md", "txt"],
                accept_multiple_files=True,
            )
            if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"):
@ -1040,18 +1041,45 @@ with tab_finetune:
                st.rerun()

        elif ft_step == 2:
-            st.markdown("**Step 2: Preview Training Pairs**")
-            st.info("Run `python scripts/prepare_training_data.py` to extract pairs, then return here.")
+            st.markdown("**Step 2: Extract Training Pairs**")
+            import json as _json
+            import sqlite3 as _sqlite3
+            from scripts.db import DEFAULT_DB as _FT_DB
+
            jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl"
+
+            # Show task status
+            _ft_conn = _sqlite3.connect(_FT_DB)
+            _ft_conn.row_factory = _sqlite3.Row
+            _ft_task = _ft_conn.execute(
+                "SELECT * FROM background_tasks WHERE task_type='prepare_training' ORDER BY id DESC LIMIT 1"
+            ).fetchone()
+            _ft_conn.close()
+
+            if _ft_task:
+                _ft_status = _ft_task["status"]
+                if _ft_status == "completed":
+                    st.success(f"✅ {_ft_task['error'] or 'Extraction complete'}")
+                elif _ft_status in ("running", "queued"):
+                    st.info(f"⏳ {_ft_status.capitalize()}… refresh to check progress.")
+                elif _ft_status == "failed":
+                    st.error(f"Extraction failed: {_ft_task['error']}")
+
+            if st.button("⚙️ Extract Training Pairs", type="primary", key="ft_extract2"):
+                from scripts.task_runner import submit_task as _ft_submit
+                _ft_submit(_FT_DB, "prepare_training", 0)
+                st.info("Extracting in the background — refresh in a moment.")
+                st.rerun()
+
            if jsonl_path.exists():
-                import json as _json
                pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()]
-                st.caption(f"{len(pairs)} training pairs extracted.")
+                st.caption(f"{len(pairs)} training pairs ready.")
                for i, p in enumerate(pairs[:3]):
                    with st.expander(f"Pair {i+1}"):
-                        st.text(p.get("input", "")[:300])
+                        st.text(p.get("output", p.get("input", ""))[:300])
            else:
-                st.warning("No training pairs found. Run `prepare_training_data.py` first.")
+                st.caption("No training pairs yet — click Extract above.")
+
            col_back, col_next = st.columns([1, 4])
            if col_back.button("← Back", key="ft_back2"):
                st.session_state.ft_step = 1
@ -1061,13 +1089,45 @@ with tab_finetune:
                st.rerun()

        elif ft_step == 3:
-            st.markdown("**Step 3: Train**")
-            st.slider("Epochs", 3, 20, 10, key="ft_epochs")
-            if st.button("🚀 Start Fine-Tune", type="primary", key="ft_start"):
-                st.info("Fine-tune queued as a background task. Check back in 30–60 minutes.")
-            if st.button("← Back", key="ft_back3"):
+            st.markdown("**Step 3: Fine-Tune**")
+
+            _ft_profile_name = ((_profile.name.split() or ["cover"])[0].lower()
+                                if _profile else "cover")
+            _ft_model_name = f"{_ft_profile_name}-cover-writer"
+
+            st.info(
+                "Run the command below from your terminal. Training takes 30–90 min on GPU "
+                "and registers the model automatically when complete."
+            )
+            st.code("make finetune PROFILE=single-gpu", language="bash")
+            st.caption(
+                f"Your model will appear as **{_ft_model_name}:latest** in Ollama. "
+                "Cover letter generation will use it automatically."
+            )
+
+            st.markdown("**Model status:**")
+            try:
+                import os as _os
+                import requests as _ft_req
+                _ollama_url = _os.environ.get("OLLAMA_URL", "http://localhost:11434")
+                _tags = _ft_req.get(f"{_ollama_url}/api/tags", timeout=3)
+                if _tags.status_code == 200:
+                    _model_names = [m["name"] for m in _tags.json().get("models", [])]
+                    if any(_ft_model_name in m for m in _model_names):
+                        st.success(f"✅ `{_ft_model_name}:latest` is ready in Ollama!")
+                    else:
+                        st.warning(f"⏳ `{_ft_model_name}:latest` not registered yet.")
+                else:
+                    st.caption("Ollama returned an unexpected response.")
+            except Exception:
+                st.caption("Could not reach Ollama — ensure services are running with `make start`.")
+
+            col_back, col_refresh = st.columns([1, 3])
+            if col_back.button("← Back", key="ft_back3"):
                st.session_state.ft_step = 2
                st.rerun()
+            if col_refresh.button("🔄 Check model status", key="ft_refresh3"):
+                st.rerun()

 # ── Developer tab ─────────────────────────────────────────────────────────────
 if _show_dev_tab:
--- a/compose.gpu.yml
+++ b/compose.gpu.yml
@ -0,0 +1,46 @@
+# compose.gpu.yml — Docker NVIDIA GPU overlay
+#
+# Adds NVIDIA GPU reservations to Peregrine services.
+# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
+# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
+#
+# Prerequisites:
+#   sudo nvidia-ctk runtime configure --runtime=docker
+#   sudo systemctl restart docker
+#
+services:
+  ollama:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]
+              capabilities: [gpu]
+
+  vision:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]
+              capabilities: [gpu]
+
+  vllm:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["1"]
+              capabilities: [gpu]
+
+  finetune:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]
+              capabilities: [gpu]
--- a/compose.podman-gpu.yml
+++ b/compose.podman-gpu.yml
@ -0,0 +1,43 @@
+# compose.podman-gpu.yml — Podman GPU override
+#
+# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
+# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
+# when podman/podman-compose is detected, or manually:
+#   podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
+#
+# Prerequisites:
+#   sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
+#   (requires nvidia-container-toolkit >= 1.14)
+#
+services:
+  ollama:
+    devices:
+      - nvidia.com/gpu=0
+    deploy:
+      resources:
+        reservations:
+          devices: []
+
+  vision:
+    devices:
+      - nvidia.com/gpu=0
+    deploy:
+      resources:
+        reservations:
+          devices: []
+
+  vllm:
+    devices:
+      - nvidia.com/gpu=1
+    deploy:
+      resources:
+        reservations:
+          devices: []
+
+  finetune:
+    devices:
+      - nvidia.com/gpu=0
+    deploy:
+      resources:
+        reservations:
+          devices: []
--- a/compose.yml
+++ b/compose.yml
@ -12,6 +12,7 @@ services:
      - ${DOCS_DIR:-~/Documents/JobSearch}:/docs
    environment:
      - STAGING_DB=/app/data/staging.db
+      - DOCS_DIR=/docs
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
      - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
@ -47,18 +48,6 @@ services:
    profiles: [cpu, single-gpu, dual-gpu]
    restart: unless-stopped

-  ollama-gpu:
-    extends:
-      service: ollama
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              device_ids: ["0"]
-              capabilities: [gpu]
-    profiles: [single-gpu, dual-gpu]
-
  vision:
    build:
      context: .
@ -68,13 +57,6 @@ services:
    environment:
      - VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
      - VISION_REVISION=${VISION_REVISION:-2025-01-09}
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              device_ids: ["0"]
-              capabilities: [gpu]
    profiles: [single-gpu, dual-gpu]
    restart: unless-stopped

@ -92,12 +74,24 @@ services:
      --enforce-eager
      --max-num-seqs 8
      --cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              device_ids: ["1"]
-              capabilities: [gpu]
    profiles: [dual-gpu]
    restart: unless-stopped
+
+  finetune:
+    build:
+      context: .
+      dockerfile: Dockerfile.finetune
+    volumes:
+      - ${DOCS_DIR:-~/Documents/JobSearch}:/docs
+      - ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
+      - ./config:/app/config
+    environment:
+      - DOCS_DIR=/docs
+      - OLLAMA_URL=http://ollama:11434
+      - OLLAMA_MODELS_MOUNT=/ollama-models
+      - OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
+    depends_on:
+      ollama:
+        condition: service_started
+    profiles: [cpu, single-gpu, dual-gpu]
+    restart: "no"
--- a/config/llm.yaml
+++ b/config/llm.yaml
@ -21,26 +21,26 @@ backends:
    type: openai_compat
  ollama:
    api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1
    enabled: true
-    model: meghan-cover-writer:latest
+    model: llama3.2:3b
    supports_images: false
    type: openai_compat
  ollama_research:
    api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1
    enabled: true
-    model: llama3.1:8b
+    model: llama3.2:3b
    supports_images: false
    type: openai_compat
  vision_service:
-    base_url: http://localhost:8002
+    base_url: http://vision:8002
    enabled: true
    supports_images: true
    type: vision_service
  vllm:
    api_key: ''
-    base_url: http://localhost:8000/v1
+    base_url: http://vllm:8000/v1
    enabled: true
    model: __auto__
    supports_images: false
--- a/config/llm.yaml.example
+++ b/config/llm.yaml.example
@ -21,21 +21,21 @@ backends:
    supports_images: false
  ollama:
    api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1    # Docker service name; use localhost:11434 outside Docker
    enabled: true
-    model: meghan-cover-writer:latest
+    model: llama3.2:3b
    type: openai_compat
    supports_images: false
  ollama_research:
    api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1    # Docker service name; use localhost:11434 outside Docker
    enabled: true
-    model: llama3.1:8b
+    model: llama3.2:3b
    type: openai_compat
    supports_images: false
  vllm:
    api_key: ''
-    base_url: http://localhost:8000/v1
+    base_url: http://vllm:8000/v1      # Docker service name; use localhost:8000 outside Docker
    enabled: true
    model: __auto__
    type: openai_compat
--- a/config/user.yaml.example
+++ b/config/user.yaml.example
@ -44,15 +44,15 @@ inference_profile: "remote"  # remote | cpu | single-gpu | dual-gpu

 services:
  streamlit_port: 8501
-  ollama_host: localhost
+  ollama_host: ollama        # Docker service name; use "localhost" if running outside Docker
  ollama_port: 11434
  ollama_ssl: false
  ollama_ssl_verify: true
-  vllm_host: localhost
+  vllm_host: vllm            # Docker service name; use "localhost" if running outside Docker
  vllm_port: 8000
  vllm_ssl: false
  vllm_ssl_verify: true
-  searxng_host: localhost
-  searxng_port: 8888
+  searxng_host: searxng      # Docker service name; use "localhost" if running outside Docker
+  searxng_port: 8080         # internal Docker port; use 8888 for host-mapped access
  searxng_ssl: false
  searxng_ssl_verify: true
--- a/docs/backlog.md
+++ b/docs/backlog.md
@ -43,7 +43,7 @@ Unscheduled ideas and deferred features. Roughly grouped by area.

 ## Container Runtime

- **Podman support** — Update `Makefile` to auto-detect `docker compose` vs `podman-compose` (e.g. `COMPOSE ?= $(shell command -v docker 2>/dev/null && echo "docker compose" || echo "podman-compose")`). Note in README that rootless Podman requires CDI GPU device spec (`nvidia.com/gpu=all`) instead of `runtime: nvidia` in `compose.yml`.
+- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
 - **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.

 ---
--- a/docs/plans/2026-02-25-circuitforge-license-design.md
+++ b/docs/plans/2026-02-25-circuitforge-license-design.md
@ -0,0 +1,367 @@
+# CircuitForge License Server — Design Document
+
+**Date:** 2026-02-25
+**Status:** Approved — ready for implementation
+
+---
+
+## Goal
+
+Build a self-hosted licensing server for Circuit Forge LLC products. v1 serves Peregrine; schema is multi-product from day one. Enforces free / paid / premium / ultra tier gates with offline-capable JWT validation, 30-day refresh cycle, 7-day grace period, seat tracking, usage telemetry, and a content violation flagging foundation.
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────┐
+│  circuitforge-license  (Heimdall:8600)           │
+│  FastAPI + SQLite + RS256 JWT                    │
+│                                                  │
+│  Public API (/v1/…):                             │
+│    POST /v1/activate     → issue JWT             │
+│    POST /v1/refresh      → renew JWT             │
+│    POST /v1/deactivate   → free a seat           │
+│    POST /v1/usage        → record usage event    │
+│    POST /v1/flag         → report violation      │
+│                                                  │
+│  Admin API (/admin/…, bearer token):             │
+│    POST/GET /admin/keys          → CRUD keys     │
+│    DELETE   /admin/keys/{id}     → revoke        │
+│    GET      /admin/activations   → audit         │
+│    GET      /admin/usage         → telemetry     │
+│    GET/PATCH /admin/flags        → flag review   │
+└─────────────────────────────────────────────────┘
+         ↑ HTTPS via Caddy (license.circuitforge.com)
+
+┌─────────────────────────────────────────────────┐
+│  Peregrine (user's machine)                      │
+│  scripts/license.py                              │
+│                                                  │
+│  activate(key)    → POST /v1/activate            │
+│                     writes config/license.json   │
+│  verify_local()   → validates JWT offline        │
+│                     using embedded public key    │
+│  refresh_if_needed() → called on app startup     │
+│  effective_tier() → tier string for can_use()    │
+│  report_usage(…)  → fire-and-forget telemetry    │
+│  report_flag(…)   → fire-and-forget violation    │
+└─────────────────────────────────────────────────┘
+```
+
+**Key properties:**
+- Peregrine verifies tier **offline** on every check — RS256 public key embedded at build time
+- Network required only at activation and 30-day refresh
+- Revoked keys stop working at next refresh cycle (≤30 day lag — acceptable for v1)
+- `config/license.json` gitignored; missing = free tier
+
+---
+
+## Crypto: RS256 (asymmetric JWT)
+
+- **Private key** — lives only on the license server (`keys/private.pem`, gitignored)
+- **Public key** — committed to both the license server repo and Peregrine (`scripts/license_public_key.pem`)
+- Peregrine can verify JWT authenticity without ever knowing the private key
+- A stolen JWT cannot be forged without the private key
+- Revocation: server refuses refresh; old JWT valid until expiry then grace period expires
+
+**Key generation (one-time, on Heimdall):**
+```bash
+openssl genrsa -out keys/private.pem 2048
+openssl rsa -in keys/private.pem -pubout -out keys/public.pem
+# copy keys/public.pem → peregrine/scripts/license_public_key.pem
+```
+
+---
+
+## Database Schema
+
+```sql
+CREATE TABLE license_keys (
+    id             TEXT PRIMARY KEY,            -- UUID
+    key_display    TEXT UNIQUE NOT NULL,        -- CFG-PRNG-XXXX-XXXX-XXXX
+    product        TEXT NOT NULL,               -- peregrine | falcon | osprey | …
+    tier           TEXT NOT NULL,               -- paid | premium | ultra
+    seats          INTEGER DEFAULT 1,
+    valid_until    TEXT,                        -- ISO date or NULL (perpetual)
+    revoked        INTEGER DEFAULT 0,
+    customer_email TEXT,                        -- proper field, not buried in notes
+    source         TEXT DEFAULT 'manual',       -- manual | beta | promo | stripe
+    trial          INTEGER DEFAULT 0,           -- 1 = time-limited trial key
+    notes          TEXT,
+    created_at     TEXT NOT NULL
+);
+
+CREATE TABLE activations (
+    id             TEXT PRIMARY KEY,
+    key_id         TEXT NOT NULL REFERENCES license_keys(id),
+    machine_id     TEXT NOT NULL,               -- sha256(hostname + MAC)
+    app_version    TEXT,                        -- Peregrine version at last refresh
+    platform       TEXT,                        -- linux | macos | windows | docker
+    activated_at   TEXT NOT NULL,
+    last_refresh   TEXT NOT NULL,
+    deactivated_at TEXT                         -- NULL = still active
+);
+
+CREATE TABLE usage_events (
+    id          TEXT PRIMARY KEY,
+    key_id      TEXT NOT NULL REFERENCES license_keys(id),
+    machine_id  TEXT NOT NULL,
+    product     TEXT NOT NULL,
+    event_type  TEXT NOT NULL,                  -- cover_letter_generated |
+                                                --   company_research | email_sync |
+                                                --   interview_prep | survey | etc.
+    metadata    TEXT,                           -- JSON blob for context
+    created_at  TEXT NOT NULL
+);
+
+CREATE TABLE flags (
+    id           TEXT PRIMARY KEY,
+    key_id       TEXT NOT NULL REFERENCES license_keys(id),
+    machine_id   TEXT,
+    product      TEXT NOT NULL,
+    flag_type    TEXT NOT NULL,                 -- content_violation | tos_violation |
+                                                --   abuse | manual
+    details      TEXT,                          -- JSON: prompt snippet, output excerpt
+    status       TEXT DEFAULT 'open',           -- open | reviewed | dismissed | actioned
+    created_at   TEXT NOT NULL,
+    reviewed_at  TEXT,
+    action_taken TEXT                           -- none | warned | revoked
+);
+
+CREATE TABLE audit_log (
+    id          TEXT PRIMARY KEY,
+    entity_type TEXT NOT NULL,                  -- key | activation | flag
+    entity_id   TEXT NOT NULL,
+    action      TEXT NOT NULL,                  -- created | revoked | activated |
+                                                --   deactivated | flag_actioned
+    actor       TEXT,                           -- admin identifier (future multi-admin)
+    details     TEXT,                           -- JSON
+    created_at  TEXT NOT NULL
+);
+```
+
+**Flags scope (v1):** Schema and `POST /v1/flag` endpoint capture data. No admin enforcement UI in v1 — query DB directly. Build review UI in v2 when there's data to act on.
+
+---
+
+## JWT Payload
+
+```json
+{
+  "sub":      "CFG-PRNG-A1B2-C3D4-E5F6",
+  "product":  "peregrine",
+  "tier":     "paid",
+  "seats":    2,
+  "machine":  "a3f9c2…",
+  "notice":   "Version 1.1 available — see circuitforge.com/update",
+  "iat":      1740000000,
+  "exp":      1742592000
+}
+```
+
+`notice` is optional — set via a server config value; included in refresh responses so Peregrine can surface it as a banner. No DB table needed.
+
+---
+
+## Key Format
+
+`CFG-PRNG-A1B2-C3D4-E5F6`
+
+- `CFG` — Circuit Forge
+- `PRNG` / `FLCN` / `OSPY` / … — 4-char product code
+- Three random 4-char alphanumeric segments
+- Human-readable, easy to copy/paste into a support email
+
+---
+
+## Endpoint Reference
+
+| Method | Path | Auth | Purpose |
+|--------|------|------|---------|
+| POST | `/v1/activate` | none | Issue JWT for key + machine |
+| POST | `/v1/refresh` | JWT bearer | Renew JWT before expiry |
+| POST | `/v1/deactivate` | JWT bearer | Free a seat |
+| POST | `/v1/usage` | JWT bearer | Record usage event (fire-and-forget) |
+| POST | `/v1/flag` | JWT bearer | Report content/ToS violation |
+| POST | `/admin/keys` | admin token | Create a new key |
+| GET | `/admin/keys` | admin token | List all keys + activation counts |
+| DELETE | `/admin/keys/{id}` | admin token | Revoke a key |
+| GET | `/admin/activations` | admin token | Full activation audit |
+| GET | `/admin/usage` | admin token | Usage breakdown per key/product/event |
+| GET | `/admin/flags` | admin token | List flags (open by default) |
+| PATCH | `/admin/flags/{id}` | admin token | Update flag status + action |
+
+---
+
+## Peregrine Client (`scripts/license.py`)
+
+**Public API:**
+```python
+def activate(key: str) -> dict             # POST /v1/activate, writes license.json
+def verify_local() -> dict | None          # validates JWT offline; None = free tier
+def refresh_if_needed() -> None            # silent; called on app startup
+def effective_tier() -> str                # "free"|"paid"|"premium"|"ultra"
+def report_usage(event_type: str,          # fire-and-forget; failures silently dropped
+                 metadata: dict = {}) -> None
+def report_flag(flag_type: str,            # fire-and-forget
+                details: dict) -> None
+```
+
+**`effective_tier()` decision tree:**
+```
+license.json missing or unreadable     → "free"
+JWT signature invalid                  → "free"
+JWT product != "peregrine"             → "free"
+JWT not expired                        → tier from payload
+JWT expired, within grace period       → tier from payload + show banner
+JWT expired, grace period expired      → "free" + show banner
+```
+
+**`config/license.json` (gitignored):**
+```json
+{
+  "jwt":          "eyJ…",
+  "key_display":  "CFG-PRNG-A1B2-C3D4-E5F6",
+  "tier":         "paid",
+  "valid_until":  "2026-03-27",
+  "machine_id":   "a3f9c2…",
+  "last_refresh": "2026-02-25T12:00:00Z",
+  "grace_until":  null
+}
+```
+
+**Integration point in `tiers.py`:**
+```python
+def effective_tier(profile) -> str:
+    from scripts.license import effective_tier as _license_tier
+    if profile.dev_tier_override:      # dev override still works in dev mode
+        return profile.dev_tier_override
+    return _license_tier()
+```
+
+**Settings License tab** (new tab in `app/pages/2_Settings.py`):
+- Text input: enter license key → calls `activate()` → shows result
+- If active: tier badge, key display string, expiry date, seat count
+- Grace period: amber banner with days remaining
+- "Deactivate this machine" button → `/v1/deactivate`, deletes `license.json`
+
+---
+
+## Deployment
+
+**Repo:** `git.opensourcesolarpunk.com/pyr0ball/circuitforge-license` (private)
+
+**Repo layout:**
+```
+circuitforge-license/
+├── app/
+│   ├── main.py          # FastAPI app
+│   ├── db.py            # SQLite helpers, schema init
+│   ├── models.py        # Pydantic models
+│   ├── crypto.py        # RSA sign/verify helpers
+│   └── routes/
+│       ├── public.py    # /v1/* endpoints
+│       └── admin.py     # /admin/* endpoints
+├── data/                # SQLite DB (named volume)
+├── keys/
+│   ├── private.pem      # gitignored
+│   └── public.pem       # committed
+├── scripts/
+│   └── issue-key.sh     # curl wrapper for key issuance
+├── tests/
+├── Dockerfile
+├── docker-compose.yml
+├── .env.example
+└── requirements.txt
+```
+
+**`docker-compose.yml` (on Heimdall):**
+```yaml
+services:
+  license:
+    build: .
+    restart: unless-stopped
+    ports:
+      - "127.0.0.1:8600:8600"
+    volumes:
+      - license_data:/app/data
+      - ./keys:/app/keys:ro
+    env_file: .env
+
+volumes:
+  license_data:
+```
+
+**`.env` (gitignored):**
+```
+ADMIN_TOKEN=<long random string>
+JWT_PRIVATE_KEY_PATH=/app/keys/private.pem
+JWT_PUBLIC_KEY_PATH=/app/keys/public.pem
+JWT_EXPIRY_DAYS=30
+GRACE_PERIOD_DAYS=7
+```
+
+**Caddy block (add to Heimdall Caddyfile):**
+```caddy
+license.circuitforge.com {
+    reverse_proxy localhost:8600
+}
+```
+
+---
+
+## Admin Workflow (v1)
+
+All operations via `curl` or `scripts/issue-key.sh`:
+
+```bash
+# Issue a key
+./scripts/issue-key.sh --product peregrine --tier paid --seats 2 \
+  --email user@example.com --notes "Beta — manual payment 2026-02-25"
+# → CFG-PRNG-A1B2-C3D4-E5F6  (email to customer)
+
+# List all keys
+curl https://license.circuitforge.com/admin/keys \
+  -H "Authorization: Bearer $ADMIN_TOKEN"
+
+# Revoke a key
+curl -X DELETE https://license.circuitforge.com/admin/keys/{id} \
+  -H "Authorization: Bearer $ADMIN_TOKEN"
+```
+
+---
+
+## Testing Strategy
+
+**License server:**
+- pytest with in-memory SQLite and generated test keypair
+- All endpoints tested: activate, refresh, deactivate, usage, flag, admin CRUD
+- Seat limit enforcement, expiry, revocation all unit tested
+
+**Peregrine client:**
+- `verify_local()` tested with pre-signed test JWT using test keypair
+- `activate()` / `refresh()` tested with `httpx` mocks
+- `effective_tier()` tested across all states: valid, expired, grace, revoked, missing
+
+**Integration smoke test:**
+```bash
+docker compose up -d
+# create test key via admin API
+# call /v1/activate with test key
+# verify JWT signature with public key
+# verify /v1/refresh extends expiry
+```
+
+---
+
+## Decisions Log
+
+| Decision | Rationale |
+|----------|-----------|
+| RS256 over HS256 | Public key embeddable in client; private key never leaves server |
+| SQLite over Postgres | Matches Peregrine's SQLite-first philosophy; trivially backupable |
+| 30-day JWT lifetime | Standard SaaS pattern; invisible to users in normal operation |
+| 7-day grace period | Covers travel, network outages, server maintenance |
+| Flags v1: capture only | No volume to justify review UI yet; add in v2 |
+| No payment integration | Manual issuance until customer volume justifies automation |
+| Multi-product schema | Adding a column now vs migrating a live DB later |
+| Separate repo | License server is infrastructure, not part of Peregrine's BSL scope |
--- a/docs/plans/2026-02-25-circuitforge-license-plan.md
+++ b/docs/plans/2026-02-25-circuitforge-license-plan.md
--- a/scrapers/companyScraper.py
+++ b/scrapers/companyScraper.py
--- a/scripts/finetune_local.py
+++ b/scripts/finetune_local.py
@ -32,7 +32,12 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 # ── Config ────────────────────────────────────────────────────────────────────
 DEFAULT_MODEL   = "unsloth/Llama-3.2-3B-Instruct"   # safe on 8 GB VRAM

-_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
+# DOCS_DIR env var overrides user_profile when running inside Docker
+_docs_env = os.environ.get("DOCS_DIR", "")
+_docs = Path(_docs_env) if _docs_env else (
+    _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
+)
+
 LETTERS_JSONL   = _docs / "training_data" / "cover_letters.jsonl"
 OUTPUT_DIR      = _docs / "training_data" / "finetune_output"
 GGUF_DIR        = _docs / "training_data" / "gguf"
@ -66,7 +71,7 @@ print(f"{'='*60}\n")
 # ── Load dataset ──────────────────────────────────────────────────────────────
 if not LETTERS_JSONL.exists():
    sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
-             "Run: conda run -n job-seeker python scripts/prepare_training_data.py")
+             "Run: make prepare-training  (or: python scripts/prepare_training_data.py)")

 records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
 print(f"Loaded {len(records)} training examples.")
@ -222,35 +227,102 @@ if not args.no_gguf and USE_UNSLOTH:
 else:
    gguf_path = None

-# ── Print next steps ──────────────────────────────────────────────────────────
-print(f"\n{'='*60}")
-print("  DONE — next steps to load into Ollama:")
-print(f"{'='*60}")
+# ── Register with Ollama (auto) ────────────────────────────────────────────────
+
+def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool:
+    """
+    Copy GGUF into the shared Ollama models volume and register via the API.
+
+    Works in two modes:
+      Containerised — OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
+                      translate the container path into Ollama's view of the file.
+      Local         — gguf_path is an absolute path Ollama can read directly.
+    """
+    import shutil
+    import requests
+
+    ollama_url        = os.environ.get("OLLAMA_URL", "http://localhost:11434")
+    models_mount      = os.environ.get("OLLAMA_MODELS_MOUNT", "")
+    ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
+
+    # ── Place GGUF where Ollama can read it ───────────────────────────────────
+    if models_mount and ollama_models_dir:
+        # Containerised: write into the shared volume; Ollama reads from its own mount.
+        dest_dir = Path(models_mount) / "custom"
+        dest_dir.mkdir(parents=True, exist_ok=True)
+        dest = dest_dir / gguf_path.name
+        if dest != gguf_path:
+            print(f"Copying GGUF → shared volume: {dest}")
+            shutil.copy2(gguf_path, dest)
+        ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
+    else:
+        # Local: pass the absolute path directly.
+        ollama_gguf = str(gguf_path.resolve())
+
+    modelfile_text = (
+        f"FROM {ollama_gguf}\n"
+        f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
+        f"PARAMETER temperature 0.7\n"
+        f"PARAMETER top_p 0.9\n"
+        f"PARAMETER num_ctx 32768\n"
+    )
+
+    # Write Modelfile to disk as a reference (useful for debugging)
+    (OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
+
+    # ── Create via Ollama API ─────────────────────────────────────────────────
+    print(f"\nRegistering '{model_name}' with Ollama at {ollama_url} …")
+    try:
+        r = requests.post(
+            f"{ollama_url}/api/create",
+            json={"name": model_name, "modelfile": modelfile_text},
+            timeout=300,
+            stream=True,
+        )
+        for line in r.iter_lines():
+            if line:
+                import json as _json
+                try:
+                    msg = _json.loads(line).get("status", "")
+                except Exception:
+                    msg = line.decode()
+                if msg:
+                    print(f"  {msg}")
+        if r.status_code != 200:
+            print(f"  WARNING: Ollama returned HTTP {r.status_code}")
+            return False
+    except Exception as exc:
+        print(f"  Ollama registration failed: {exc}")
+        print(f"  Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
+        return False
+
+    # ── Update config/llm.yaml ────────────────────────────────────────────────
+    llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
+    if llm_yaml.exists():
+        try:
+            import yaml as _yaml
+            cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
+            if "backends" in cfg and "ollama" in cfg["backends"]:
+                cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
+                llm_yaml.write_text(
+                    _yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
+                )
+                print(f"  llm.yaml updated → ollama.model = {model_name}:latest")
+        except Exception as exc:
+            print(f"  Could not update llm.yaml automatically: {exc}")
+
+    print(f"\n{'='*60}")
+    print(f"  Model ready: {model_name}:latest")
+    print(f"  Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
+    print(f"{'='*60}\n")
+    return True
+

 if gguf_path and gguf_path.exists():
-    modelfile = OUTPUT_DIR / "Modelfile"
-    modelfile.write_text(f"""FROM {gguf_path}
-SYSTEM \"\"\"
-{SYSTEM_PROMPT}
-\"\"\"
-PARAMETER temperature 0.7
-PARAMETER top_p 0.9
-PARAMETER num_ctx 32768
-""")
-    print(f"\n1. Modelfile written to: {modelfile}")
-    print(f"\n2. Create the Ollama model:")
-    print(f"     ollama create {OLLAMA_NAME} -f {modelfile}")
-    print(f"\n3. Test it:")
-    print(f"     ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
-    print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
-    print(f"   then pick it in Settings → LLM Backends → Ollama → Model.")
+    _auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT)
 else:
-    print(f"\n  Adapter only (no GGUF). To convert manually:")
-    print(f"  1. Merge adapter:")
-    print(f"       conda run -n ogma python -c \"")
-    print(f"         from peft import AutoPeftModelForCausalLM")
-    print(f"         m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
-    print(f"         m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
-    print(f"  2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
-    print(f"  3. ollama create {OLLAMA_NAME} -f Modelfile")
-print()
+    print(f"\n{'='*60}")
+    print("  Adapter saved (no GGUF produced).")
+    print(f"  Re-run without --no-gguf to generate a GGUF for Ollama registration.")
+    print(f"  Adapter path: {adapter_path}")
+    print(f"{'='*60}\n")
--- a/scripts/prepare_training_data.py
+++ b/scripts/prepare_training_data.py
@ -12,6 +12,7 @@ Usage:
 """
 import argparse
 import json
+import os
 import re
 import sys
 from pathlib import Path
@ -22,7 +23,10 @@ from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None

-_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
+_docs_env = os.environ.get("DOCS_DIR", "")
+_docs = Path(_docs_env) if _docs_env else (
+    _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
+)
 LETTERS_DIR = _docs
 # Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
 LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
@ -77,6 +81,16 @@ def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]:
            if p not in seen:
                seen.add(p)
                all_paths.append(p)
+
+    # Also scan web-uploaded files (Settings → Fine-tune → Upload)
+    uploads_dir = letters_dir / "training_data" / "uploads"
+    if uploads_dir.exists():
+        for glob in ("*.md", "*.txt"):
+            for p in uploads_dir.glob(glob):
+                if p not in seen:
+                    seen.add(p)
+                    all_paths.append(p)
+
    for path in sorted(all_paths):
        text = path.read_text(encoding="utf-8", errors="ignore").strip()
        if not text or len(text) < 100:
--- a/scripts/task_runner.py
+++ b/scripts/task_runner.py
@ -243,6 +243,17 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
            )
            return

+        elif task_type == "prepare_training":
+            from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
+            records = build_records()
+            write_jsonl(records, DEFAULT_OUTPUT)
+            n = len(records)
+            update_task_status(
+                db_path, task_id, "completed",
+                error=f"{n} training pair{'s' if n != 1 else ''} extracted",
+            )
+            return
+
        else:
            raise ValueError(f"Unknown task_type: {task_type!r}")

--- a/setup.sh
+++ b/setup.sh
@ -64,6 +64,35 @@ install_git() {
    success "git installed."
 }

+# ── Podman detection ───────────────────────────────────────────────────────────
+# If Podman is already present, skip Docker entirely and ensure podman-compose is available.
+check_podman() {
+    if ! cmd_exists podman; then return 1; fi
+    success "Podman detected ($(podman --version)) — skipping Docker install."
+    # Ensure a compose provider is available
+    if podman compose version &>/dev/null 2>&1; then
+        success "podman compose available."
+    elif cmd_exists podman-compose; then
+        success "podman-compose available."
+    else
+        info "Installing podman-compose…"
+        case "$DISTRO_FAMILY" in
+            debian)  $SUDO apt-get install -y podman-compose 2>/dev/null \
+                     || pip3 install --user podman-compose ;;
+            fedora)  $SUDO dnf install -y podman-compose 2>/dev/null \
+                     || pip3 install --user podman-compose ;;
+            arch)    $SUDO pacman -Sy --noconfirm podman-compose 2>/dev/null \
+                     || pip3 install --user podman-compose ;;
+            macos)   brew install podman-compose 2>/dev/null \
+                     || pip3 install --user podman-compose ;;
+        esac
+        success "podman-compose installed."
+    fi
+    warn "GPU profiles (single-gpu, dual-gpu) require CDI setup:"
+    warn "  sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
+    return 0
+}
+
 # ── Docker ─────────────────────────────────────────────────────────────────────
 install_docker_linux_debian() {
    $SUDO apt-get update -q
@ -139,6 +168,27 @@ check_compose() {
    fi
 }

+# ── Docker daemon health check ──────────────────────────────────────────────────
+check_docker_running() {
+    if docker info &>/dev/null 2>&1; then
+        success "Docker daemon is running."
+        return
+    fi
+    warn "Docker daemon is not responding."
+    if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then
+        info "Starting Docker service…"
+        $SUDO systemctl start docker 2>/dev/null || true
+        sleep 2
+        if docker info &>/dev/null 2>&1; then
+            success "Docker daemon started."
+        else
+            warn "Docker failed to start. Run: sudo systemctl start docker"
+        fi
+    elif [[ "$OS" == "Darwin" ]]; then
+        warn "Docker Desktop is not running. Start it, wait for the whale icon, then run 'make start'."
+    fi
+}
+
 # ── NVIDIA Container Toolkit ───────────────────────────────────────────────────
 install_nvidia_toolkit() {
    [[ "$OS" != "Linux" ]] && return   # macOS has no NVIDIA support
@ -146,8 +196,8 @@ install_nvidia_toolkit() {
        info "No NVIDIA GPU detected — skipping Container Toolkit."
        return
    fi
-    if docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu22.04 nvidia-smi &>/dev/null 2>&1; then
-        success "NVIDIA Container Toolkit already working."
+    if cmd_exists nvidia-ctk && nvidia-ctk runtime validate --runtime=docker &>/dev/null 2>&1; then
+        success "NVIDIA Container Toolkit already configured."
        return
    fi
    info "NVIDIA GPU detected. Installing Container Toolkit…"
@ -176,6 +226,8 @@ install_nvidia_toolkit() {
 }

 # ── Environment setup ──────────────────────────────────────────────────────────
+# Note: Ollama runs as a Docker container — the compose.yml ollama service
+# handles model download automatically on first start (see docker/ollama/entrypoint.sh).
 setup_env() {
    SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
@ -186,29 +238,88 @@ setup_env() {
    fi
 }

+# ── Model weights storage ───────────────────────────────────────────────────────
+_update_env_key() {
+    # Portable in-place key=value update for .env files (Linux + macOS).
+    # Appends the key if not already present.
+    local file="$1" key="$2" val="$3"
+    awk -v k="$key" -v v="$val" '
+        BEGIN { found=0 }
+        $0 ~ ("^" k "=") { print k "=" v; found=1; next }
+        { print }
+        END { if (!found) print k "=" v }
+    ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
+}
+
+configure_model_paths() {
+    local env_file
+    env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
+
+    # Skip prompts when stdin is not a terminal (e.g. curl | bash)
+    if [[ ! -t 0 ]]; then
+        info "Non-interactive — using default model paths from .env"
+        return
+    fi
+
+    echo ""
+    info "Model weights storage"
+    echo -e "  AI models can be 2–30+ GB each. If you have a separate data drive,"
+    echo -e "  point these at it now. Press Enter to keep the value shown in [brackets]."
+    echo ""
+
+    local current input
+
+    current="$(grep -E '^OLLAMA_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
+    [[ -z "$current" ]] && current="~/models/ollama"
+    read -rp "  Ollama models dir [${current}]: " input || input=""
+    input="${input:-$current}"
+    input="${input/#\~/$HOME}"
+    mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
+    _update_env_key "$env_file" "OLLAMA_MODELS_DIR" "$input"
+    success "OLLAMA_MODELS_DIR=$input"
+
+    current="$(grep -E '^VLLM_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
+    [[ -z "$current" ]] && current="~/models/vllm"
+    read -rp "  vLLM models dir   [${current}]: " input || input=""
+    input="${input:-$current}"
+    input="${input/#\~/$HOME}"
+    mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
+    _update_env_key "$env_file" "VLLM_MODELS_DIR" "$input"
+    success "VLLM_MODELS_DIR=$input"
+
+    echo ""
+}
+
 # ── Main ───────────────────────────────────────────────────────────────────────
 main() {
    echo ""
-    echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
-    echo -e "${BLUE}║   Peregrine — Dependency Installer       ║${NC}"
-    echo -e "${BLUE}║   by Circuit Forge LLC                   ║${NC}"
-    echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
+    echo -e "${BLUE}╔══════════════════════════════════════════════════════╗${NC}"
+    echo -e "${BLUE}║   Peregrine — Dependency Installer                   ║${NC}"
+    echo -e "${BLUE}║   by Circuit Forge LLC                               ║${NC}"
+    echo -e "${BLUE}║   \"Don't be evil, for real and forever.\"             ║${NC}"
+    echo -e "${BLUE}╚══════════════════════════════════════════════════════╝${NC}"
    echo ""

    install_git
-    install_docker
-    check_compose
-    install_nvidia_toolkit
+    # Podman takes precedence if already installed; otherwise install Docker
+    if ! check_podman; then
+        install_docker
+        check_docker_running
+        check_compose
+        install_nvidia_toolkit
+    fi
    setup_env
+    configure_model_paths

    echo ""
    success "All dependencies installed."
    echo ""
    echo -e "  ${GREEN}Next steps:${NC}"
-    echo -e "  1. Edit ${YELLOW}.env${NC} to set your preferred ports and model paths"
-    echo -e "  2. Start Peregrine:"
-    echo -e "     ${YELLOW}docker compose --profile remote up -d${NC}"
-    echo -e "  3. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
+    echo -e "  1. Start Peregrine:"
+    echo -e "     ${YELLOW}make start${NC}             # remote/API-only (no local GPU)"
+    echo -e "     ${YELLOW}make start PROFILE=cpu${NC} # local Ollama inference (CPU)"
+    echo -e "  2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
+    echo -e "  (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
    echo ""
    if groups "$USER" 2>/dev/null | grep -q docker; then
        true
Author	SHA1	Message	Date
pyr0ball	006738f7b3	fix: fix dual-gpu port conflict + move GPU config to overlay files - Remove ollama-gpu service (was colliding with ollama on port 11434) - Strip inline deploy.resources GPU blocks from vision and vllm - Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0), vision (GPU 0), vllm (GPU 1), finetune (GPU 0) - Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match service name after removal of ollama-gpu - Update Makefile: apply compose.gpu.yml for Docker + GPU profiles (was only applying podman-gpu.yml for Podman + GPU profiles)	2026-02-25 16:44:59 -08:00
pyr0ball	dc4a08c063	feat: wire fine-tune UI end-to-end + harden setup.sh - setup.sh: replace docker-image-based NVIDIA test with nvidia-ctk validate (faster, no 100MB pull, no daemon required); add check_docker_running() to auto-start the Docker service on Linux or warn on macOS - prepare_training_data.py: also scan training_data/uploads/*.{md,txt} so web-uploaded letters are included in training data - task_runner.py: add prepare_training task type (calls build_records + write_jsonl inline; reports pair count in task result) - Settings fine-tune tab: Step 1 accepts .md/.txt uploads; Step 2 Extract button submits prepare_training background task + shows status; Step 3 shows make finetune command + live Ollama model status poller	2026-02-25 16:31:53 -08:00
pyr0ball	4d66c04d1e	feat: containerize fine-tune pipeline (Dockerfile.finetune + make finetune) - Dockerfile.finetune: PyTorch 2.3/CUDA 12.1 base + unsloth + training stack - finetune_local.py: auto-register model via Ollama HTTP API after GGUF export; path-translate between finetune container mount and Ollama's view; update config/llm.yaml automatically; DOCS_DIR env override for Docker - prepare_training_data.py: DOCS_DIR env override so make prepare-training works correctly inside the app container - compose.yml: add finetune service (cpu/single-gpu/dual-gpu profiles); DOCS_DIR=/docs injected into app + finetune containers - compose.podman-gpu.yml: CDI device override for finetune service - Makefile: make prepare-training + make finetune targets	2026-02-25 16:22:48 -08:00
pyr0ball	6c895b5a9b	feat: prompt for model weights directory during install Interactive prompt lets users with split-drive setups point Ollama and vLLM model dirs at a dedicated storage drive. Reads current .env value as default so re-runs are idempotent. Skips prompts in non-interactive (piped) mode. Creates the target directory immediately and updates .env in-place via portable awk (Linux + macOS). Also simplifies next-steps output since model paths are now configured at install time.	2026-02-25 16:08:14 -08:00
pyr0ball	1bcbff395d	fix: repair beta installer path for Docker-first deployment - llm.yaml + example: replace localhost URLs with Docker service names (ollama:11434, vllm:8000, vision:8002); replace personal model names (meghan-cover-writer, llama3.1:8b) with llama3.2:3b - user.yaml.example: update service hosts to Docker names (ollama, vllm, searxng) and searxng port from 8888 (host-mapped) to 8080 (internal) - wizard step 5: fix hardcoded localhost defaults — wizard runs inside Docker, so service name defaults are required for connection tests to pass - scrapers/companyScraper.py: bundle scraper so Dockerfile COPY succeeds - setup.sh: remove host Ollama install (conflicts with Docker Ollama on port 11434); Docker entrypoint handles model download automatically - README + setup.sh banner: add Circuit Forge mission statement	2026-02-25 16:03:10 -08:00
pyr0ball	f55f7b78fc	feat: add Ollama install + service start + model pull to setup.sh	2026-02-25 15:42:56 -08:00
pyr0ball	9a4db4e18c	feat: Podman support — auto-detect COMPOSE, CDI GPU override, podman-compose in setup.sh	2026-02-25 15:36:36 -08:00
pyr0ball	71e30be9b9	docs: fix license server paths — dev under CircuitForge/, live at /devl/	2026-02-25 15:28:32 -08:00
pyr0ball	23b0703485	docs: CircuitForge license server implementation plan (11 tasks)	2026-02-25 15:27:39 -08:00
pyr0ball	0bc17a1d84	docs: CircuitForge license server design doc RS256 JWT, FastAPI + SQLite, multi-product schema, offline-capable client integration. Covers server, Peregrine client, deployment, admin workflow, and testing strategy.	2026-02-25 15:21:07 -08:00