20 changed files with 111 additions and 4127 deletions
--- a/.gitignore
+++ b/.gitignore
@ -22,8 +22,3 @@ config/user.yaml
 config/.backup-*
 config/integrations/*.yaml
 !config/integrations/*.yaml.example
 # companyScraper runtime artifacts
 scrapers/.cache/
 scrapers/.debug/
 scrapers/raw_scrapes/
--- a/Dockerfile.finetune
+++ b/Dockerfile.finetune
@ -1,38 +0,0 @@
 # Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
 # Large image (~12-15 GB after build). Built once, cached on rebuilds.
 # GPU strongly recommended. CPU fallback works but training is very slow.
 #
 # Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
 # If your GPU requires a different CUDA version, change the FROM line and
 # reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
 FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
 WORKDIR /app
 # Build tools needed by bitsandbytes CUDA kernels and unsloth
 RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc g++ git libgomp1 \
    && rm -rf /var/lib/apt/lists/*
 # Install training stack.
 # unsloth detects CUDA version automatically from the base image.
 RUN pip install --no-cache-dir \
    "unsloth @ git+https://github.com/unslothai/unsloth.git" \
    "datasets>=2.18" "trl>=0.8" peft transformers \
    "bitsandbytes>=0.43.0" accelerate sentencepiece \
    requests pyyaml
 COPY scripts/ /app/scripts/
 COPY config/  /app/config/
 ENV PYTHONUNBUFFERED=1
 # Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
 ENV CUDA_VISIBLE_DEVICES=0
 # Runtime env vars injected by compose.yml:
 #   OLLAMA_URL              — Ollama API base (default: http://ollama:11434)
 #   OLLAMA_MODELS_MOUNT     — finetune container's mount path for ollama models volume
 #   OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
 #   DOCS_DIR                — cover letters + training data root (default: /docs)
 ENTRYPOINT ["python", "scripts/finetune_local.py"]
--- a/46
+++ b/46
@ -1,66 +1,36 @@
 # Makefile — Peregrine convenience targets
 # Usage: make <target>
-.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help
+.PHONY: setup preflight start stop restart logs test clean help
 PROFILE ?= remote
 PYTHON  ?= python3
-# Auto-detect container engine: prefer docker compose, fall back to podman
+setup:          ## Install dependencies (Docker, NVIDIA toolkit)
 COMPOSE ?= $(shell \
  command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
  && echo "docker compose" \
  || (command -v podman >/dev/null 2>&1 \
      && podman compose version >/dev/null 2>&1 \
      && echo "podman compose" \
      || echo "podman-compose"))
 # GPU profiles require an overlay for NVIDIA device reservations.
 # Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
 # Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
 COMPOSE_FILES := -f compose.yml
 ifneq (,$(findstring podman,$(COMPOSE)))
  ifneq (,$(findstring gpu,$(PROFILE)))
    COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
  endif
 else
  ifneq (,$(findstring gpu,$(PROFILE)))
    COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
  endif
 endif
 setup:          ## Install dependencies (Docker or Podman + NVIDIA toolkit)
 	@bash setup.sh
 preflight:      ## Check ports + system resources; write .env
 	@$(PYTHON) scripts/preflight.py
 start: preflight  ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
-	$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d
+	docker compose --profile $(PROFILE) up -d
 stop:           ## Stop all Peregrine services
-	$(COMPOSE) down
+	docker compose down
 restart: preflight  ## Preflight check then restart all services
-	$(COMPOSE) down && $(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d
+	docker compose down && docker compose --profile $(PROFILE) up -d
 logs:           ## Tail app logs
-	$(COMPOSE) logs -f app
+	docker compose logs -f app
 test:           ## Run the test suite
-	@$(PYTHON) -m pytest tests/ -v
+	$(PYTHON) -m pytest tests/ -v
 prepare-training: ## Scan docs_dir for cover letters and build training JSONL
 	$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
 finetune:       ## Fine-tune your personal cover letter model (run prepare-training first)
 	@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
 	$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune
 clean:          ## Remove containers, images, and data volumes (DESTRUCTIVE)
 	@echo "WARNING: This will delete all Peregrine containers and data."
 	@read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ]
-	$(COMPOSE) down --rmi local --volumes
+	docker compose down --rmi local --volumes
 help:           ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
--- a/README.md
+++ b/README.md
@ -2,8 +2,6 @@
 **AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**
 > *"Don't be evil, for real and forever."*
 Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
 Privacy-first, local-first. Your data never leaves your machine.
--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@ -403,9 +403,9 @@ elif step == 5:
        st.caption("Change only if services run on non-default ports or remote hosts.")
        svc = dict(saved_yaml.get("services", {}))
        for svc_name, default_host, default_port in [
-            ("ollama",  "ollama",   11434),  # Docker service name
+            ("ollama",  "localhost", 11434),
-            ("vllm",    "vllm",     8000),   # Docker service name
+            ("vllm",    "localhost", 8000),
-            ("searxng", "searxng",  8080),   # Docker internal port (host-mapped: 8888)
+            ("searxng", "localhost", 8888),
        ]:
            c1, c2 = st.columns([3, 1])
            svc[f"{svc_name}_host"] = c1.text_input(
--- a/app/pages/2_Settings.py
+++ b/app/pages/2_Settings.py
@ -1026,10 +1026,9 @@ with tab_finetune:
        if ft_step == 1:
            st.markdown("**Step 1: Upload Cover Letters**")
            st.caption("Accepted formats: `.md` or `.txt`. Convert PDFs to text before uploading.")
            uploaded = st.file_uploader(
-                "Upload cover letters (.md or .txt)",
+                "Upload cover letters (PDF, DOCX, or TXT)",
-                type=["md", "txt"],
+                type=["pdf", "docx", "txt"],
                accept_multiple_files=True,
            )
            if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"):
@ -1041,45 +1040,18 @@ with tab_finetune:
                st.rerun()
        elif ft_step == 2:
-            st.markdown("**Step 2: Extract Training Pairs**")
+            st.markdown("**Step 2: Preview Training Pairs**")
-            import json as _json
+            st.info("Run `python scripts/prepare_training_data.py` to extract pairs, then return here.")
            import sqlite3 as _sqlite3
            from scripts.db import DEFAULT_DB as _FT_DB
            jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl"
            # Show task status
            _ft_conn = _sqlite3.connect(_FT_DB)
            _ft_conn.row_factory = _sqlite3.Row
            _ft_task = _ft_conn.execute(
                "SELECT * FROM background_tasks WHERE task_type='prepare_training' ORDER BY id DESC LIMIT 1"
            ).fetchone()
            _ft_conn.close()
            if _ft_task:
                _ft_status = _ft_task["status"]
                if _ft_status == "completed":
                    st.success(f"✅ {_ft_task['error'] or 'Extraction complete'}")
                elif _ft_status in ("running", "queued"):
                    st.info(f"⏳ {_ft_status.capitalize()}… refresh to check progress.")
                elif _ft_status == "failed":
                    st.error(f"Extraction failed: {_ft_task['error']}")
            if st.button("⚙️ Extract Training Pairs", type="primary", key="ft_extract2"):
                from scripts.task_runner import submit_task as _ft_submit
                _ft_submit(_FT_DB, "prepare_training", 0)
                st.info("Extracting in the background — refresh in a moment.")
                st.rerun()
            if jsonl_path.exists():
                import json as _json
                pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()]
-                st.caption(f"{len(pairs)} training pairs ready.")
+                st.caption(f"{len(pairs)} training pairs extracted.")
                for i, p in enumerate(pairs[:3]):
                    with st.expander(f"Pair {i+1}"):
-                        st.text(p.get("output", p.get("input", ""))[:300])
+                        st.text(p.get("input", "")[:300])
            else:
-                st.caption("No training pairs yet — click Extract above.")
+                st.warning("No training pairs found. Run `prepare_training_data.py` first.")
            col_back, col_next = st.columns([1, 4])
            if col_back.button("← Back", key="ft_back2"):
                st.session_state.ft_step = 1
@ -1089,45 +1061,13 @@ with tab_finetune:
                st.rerun()
        elif ft_step == 3:
-            st.markdown("**Step 3: Fine-Tune**")
+            st.markdown("**Step 3: Train**")
-
+            st.slider("Epochs", 3, 20, 10, key="ft_epochs")
-            _ft_profile_name = ((_profile.name.split() or ["cover"])[0].lower()
+            if st.button("🚀 Start Fine-Tune", type="primary", key="ft_start"):
-                                if _profile else "cover")
+                st.info("Fine-tune queued as a background task. Check back in 30–60 minutes.")
-            _ft_model_name = f"{_ft_profile_name}-cover-writer"
+            if st.button("← Back", key="ft_back3"):
            st.info(
                "Run the command below from your terminal. Training takes 30–90 min on GPU "
                "and registers the model automatically when complete."
            )
            st.code("make finetune PROFILE=single-gpu", language="bash")
            st.caption(
                f"Your model will appear as **{_ft_model_name}:latest** in Ollama. "
                "Cover letter generation will use it automatically."
            )
            st.markdown("**Model status:**")
            try:
                import os as _os
                import requests as _ft_req
                _ollama_url = _os.environ.get("OLLAMA_URL", "http://localhost:11434")
                _tags = _ft_req.get(f"{_ollama_url}/api/tags", timeout=3)
                if _tags.status_code == 200:
                    _model_names = [m["name"] for m in _tags.json().get("models", [])]
                    if any(_ft_model_name in m for m in _model_names):
                        st.success(f"✅ `{_ft_model_name}:latest` is ready in Ollama!")
                    else:
                        st.warning(f"⏳ `{_ft_model_name}:latest` not registered yet.")
                else:
                    st.caption("Ollama returned an unexpected response.")
            except Exception:
                st.caption("Could not reach Ollama — ensure services are running with `make start`.")
            col_back, col_refresh = st.columns([1, 3])
            if col_back.button("← Back", key="ft_back3"):
                st.session_state.ft_step = 2
                st.rerun()
            if col_refresh.button("🔄 Check model status", key="ft_refresh3"):
                st.rerun()
 # ── Developer tab ─────────────────────────────────────────────────────────────
 if _show_dev_tab:
--- a/compose.gpu.yml
+++ b/compose.gpu.yml
@ -1,46 +0,0 @@
 # compose.gpu.yml — Docker NVIDIA GPU overlay
 #
 # Adds NVIDIA GPU reservations to Peregrine services.
 # Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
 # Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
 #
 # Prerequisites:
 #   sudo nvidia-ctk runtime configure --runtime=docker
 #   sudo systemctl restart docker
 #
 services:
  ollama:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["0"]
              capabilities: [gpu]
  vision:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["0"]
              capabilities: [gpu]
  vllm:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["1"]
              capabilities: [gpu]
  finetune:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["0"]
              capabilities: [gpu]
--- a/compose.podman-gpu.yml
+++ b/compose.podman-gpu.yml
@ -1,43 +0,0 @@
 # compose.podman-gpu.yml — Podman GPU override
 #
 # Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
 # for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
 # when podman/podman-compose is detected, or manually:
 #   podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
 #
 # Prerequisites:
 #   sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
 #   (requires nvidia-container-toolkit >= 1.14)
 #
 services:
  ollama:
    devices:
      - nvidia.com/gpu=0
    deploy:
      resources:
        reservations:
          devices: []
  vision:
    devices:
      - nvidia.com/gpu=0
    deploy:
      resources:
        reservations:
          devices: []
  vllm:
    devices:
      - nvidia.com/gpu=1
    deploy:
      resources:
        reservations:
          devices: []
  finetune:
    devices:
      - nvidia.com/gpu=0
    deploy:
      resources:
        reservations:
          devices: []
--- a/compose.yml
+++ b/compose.yml
@ -12,7 +12,6 @@ services:
      - ${DOCS_DIR:-~/Documents/JobSearch}:/docs
    environment:
      - STAGING_DB=/app/data/staging.db
      - DOCS_DIR=/docs
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
      - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
@ -48,6 +47,18 @@ services:
    profiles: [cpu, single-gpu, dual-gpu]
    restart: unless-stopped
  ollama-gpu:
    extends:
      service: ollama
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["0"]
              capabilities: [gpu]
    profiles: [single-gpu, dual-gpu]
  vision:
    build:
      context: .
@ -57,6 +68,13 @@ services:
    environment:
      - VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
      - VISION_REVISION=${VISION_REVISION:-2025-01-09}
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["0"]
              capabilities: [gpu]
    profiles: [single-gpu, dual-gpu]
    restart: unless-stopped
@ -74,24 +92,12 @@ services:
      --enforce-eager
      --max-num-seqs 8
      --cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["1"]
              capabilities: [gpu]
    profiles: [dual-gpu]
    restart: unless-stopped
  finetune:
    build:
      context: .
      dockerfile: Dockerfile.finetune
    volumes:
      - ${DOCS_DIR:-~/Documents/JobSearch}:/docs
      - ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
      - ./config:/app/config
    environment:
      - DOCS_DIR=/docs
      - OLLAMA_URL=http://ollama:11434
      - OLLAMA_MODELS_MOUNT=/ollama-models
      - OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
    depends_on:
      ollama:
        condition: service_started
    profiles: [cpu, single-gpu, dual-gpu]
    restart: "no"
--- a/config/llm.yaml
+++ b/config/llm.yaml
@ -21,26 +21,26 @@ backends:
    type: openai_compat
  ollama:
    api_key: ollama
-    base_url: http://ollama:11434/v1
+    base_url: http://localhost:11434/v1
    enabled: true
-    model: llama3.2:3b
+    model: meghan-cover-writer:latest
    supports_images: false
    type: openai_compat
  ollama_research:
    api_key: ollama
-    base_url: http://ollama:11434/v1
+    base_url: http://localhost:11434/v1
    enabled: true
-    model: llama3.2:3b
+    model: llama3.1:8b
    supports_images: false
    type: openai_compat
  vision_service:
-    base_url: http://vision:8002
+    base_url: http://localhost:8002
    enabled: true
    supports_images: true
    type: vision_service
  vllm:
    api_key: ''
-    base_url: http://vllm:8000/v1
+    base_url: http://localhost:8000/v1
    enabled: true
    model: __auto__
    supports_images: false
--- a/config/llm.yaml.example
+++ b/config/llm.yaml.example
@ -21,21 +21,21 @@ backends:
    supports_images: false
  ollama:
    api_key: ollama
-    base_url: http://ollama:11434/v1    # Docker service name; use localhost:11434 outside Docker
+    base_url: http://localhost:11434/v1
    enabled: true
-    model: llama3.2:3b
+    model: meghan-cover-writer:latest
    type: openai_compat
    supports_images: false
  ollama_research:
    api_key: ollama
-    base_url: http://ollama:11434/v1    # Docker service name; use localhost:11434 outside Docker
+    base_url: http://localhost:11434/v1
    enabled: true
-    model: llama3.2:3b
+    model: llama3.1:8b
    type: openai_compat
    supports_images: false
  vllm:
    api_key: ''
-    base_url: http://vllm:8000/v1      # Docker service name; use localhost:8000 outside Docker
+    base_url: http://localhost:8000/v1
    enabled: true
    model: __auto__
    type: openai_compat
--- a/config/user.yaml.example
+++ b/config/user.yaml.example
@ -44,15 +44,15 @@ inference_profile: "remote"  # remote | cpu | single-gpu | dual-gpu
 services:
  streamlit_port: 8501
-  ollama_host: ollama        # Docker service name; use "localhost" if running outside Docker
+  ollama_host: localhost
  ollama_port: 11434
  ollama_ssl: false
  ollama_ssl_verify: true
-  vllm_host: vllm            # Docker service name; use "localhost" if running outside Docker
+  vllm_host: localhost
  vllm_port: 8000
  vllm_ssl: false
  vllm_ssl_verify: true
-  searxng_host: searxng      # Docker service name; use "localhost" if running outside Docker
+  searxng_host: localhost
-  searxng_port: 8080         # internal Docker port; use 8888 for host-mapped access
+  searxng_port: 8888
  searxng_ssl: false
  searxng_ssl_verify: true
--- a/docs/backlog.md
+++ b/docs/backlog.md
@ -43,7 +43,7 @@ Unscheduled ideas and deferred features. Roughly grouped by area.
 ## Container Runtime
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
+- **Podman support** — Update `Makefile` to auto-detect `docker compose` vs `podman-compose` (e.g. `COMPOSE ?= $(shell command -v docker 2>/dev/null && echo "docker compose" || echo "podman-compose")`). Note in README that rootless Podman requires CDI GPU device spec (`nvidia.com/gpu=all`) instead of `runtime: nvidia` in `compose.yml`.
 - **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
 ---
--- a/docs/plans/2026-02-25-circuitforge-license-design.md
+++ b/docs/plans/2026-02-25-circuitforge-license-design.md
@ -1,367 +0,0 @@
 # CircuitForge License Server — Design Document
 **Date:** 2026-02-25
 **Status:** Approved — ready for implementation
 ---
 ## Goal
 Build a self-hosted licensing server for Circuit Forge LLC products. v1 serves Peregrine; schema is multi-product from day one. Enforces free / paid / premium / ultra tier gates with offline-capable JWT validation, 30-day refresh cycle, 7-day grace period, seat tracking, usage telemetry, and a content violation flagging foundation.
 ## Architecture
 ```
 ┌─────────────────────────────────────────────────┐
 │  circuitforge-license  (Heimdall:8600)           │
 │  FastAPI + SQLite + RS256 JWT                    │
 │                                                  │
 │  Public API (/v1/…):                             │
 │    POST /v1/activate     → issue JWT             │
 │    POST /v1/refresh      → renew JWT             │
 │    POST /v1/deactivate   → free a seat           │
 │    POST /v1/usage        → record usage event    │
 │    POST /v1/flag         → report violation      │
 │                                                  │
 │  Admin API (/admin/…, bearer token):             │
 │    POST/GET /admin/keys          → CRUD keys     │
 │    DELETE   /admin/keys/{id}     → revoke        │
 │    GET      /admin/activations   → audit         │
 │    GET      /admin/usage         → telemetry     │
 │    GET/PATCH /admin/flags        → flag review   │
 └─────────────────────────────────────────────────┘
         ↑ HTTPS via Caddy (license.circuitforge.com)
 ┌─────────────────────────────────────────────────┐
 │  Peregrine (user's machine)                      │
 │  scripts/license.py                              │
 │                                                  │
 │  activate(key)    → POST /v1/activate            │
 │                     writes config/license.json   │
 │  verify_local()   → validates JWT offline        │
 │                     using embedded public key    │
 │  refresh_if_needed() → called on app startup     │
 │  effective_tier() → tier string for can_use()    │
 │  report_usage(…)  → fire-and-forget telemetry    │
 │  report_flag(…)   → fire-and-forget violation    │
 └─────────────────────────────────────────────────┘
 ```
 **Key properties:**
 - Peregrine verifies tier **offline** on every check — RS256 public key embedded at build time
 - Network required only at activation and 30-day refresh
 - Revoked keys stop working at next refresh cycle (≤30 day lag — acceptable for v1)
 - `config/license.json` gitignored; missing = free tier
 ---
 ## Crypto: RS256 (asymmetric JWT)
 - **Private key** — lives only on the license server (`keys/private.pem`, gitignored)
 - **Public key** — committed to both the license server repo and Peregrine (`scripts/license_public_key.pem`)
 - Peregrine can verify JWT authenticity without ever knowing the private key
 - A stolen JWT cannot be forged without the private key
 - Revocation: server refuses refresh; old JWT valid until expiry then grace period expires
 **Key generation (one-time, on Heimdall):**
 ```bash
 openssl genrsa -out keys/private.pem 2048
 openssl rsa -in keys/private.pem -pubout -out keys/public.pem
 # copy keys/public.pem → peregrine/scripts/license_public_key.pem
 ```
 ---
 ## Database Schema
 ```sql
 CREATE TABLE license_keys (
    id             TEXT PRIMARY KEY,            -- UUID
    key_display    TEXT UNIQUE NOT NULL,        -- CFG-PRNG-XXXX-XXXX-XXXX
    product        TEXT NOT NULL,               -- peregrine | falcon | osprey | …
    tier           TEXT NOT NULL,               -- paid | premium | ultra
    seats          INTEGER DEFAULT 1,
    valid_until    TEXT,                        -- ISO date or NULL (perpetual)
    revoked        INTEGER DEFAULT 0,
    customer_email TEXT,                        -- proper field, not buried in notes
    source         TEXT DEFAULT 'manual',       -- manual | beta | promo | stripe
    trial          INTEGER DEFAULT 0,           -- 1 = time-limited trial key
    notes          TEXT,
    created_at     TEXT NOT NULL
 );
 CREATE TABLE activations (
    id             TEXT PRIMARY KEY,
    key_id         TEXT NOT NULL REFERENCES license_keys(id),
    machine_id     TEXT NOT NULL,               -- sha256(hostname + MAC)
    app_version    TEXT,                        -- Peregrine version at last refresh
    platform       TEXT,                        -- linux | macos | windows | docker
    activated_at   TEXT NOT NULL,
    last_refresh   TEXT NOT NULL,
    deactivated_at TEXT                         -- NULL = still active
 );
 CREATE TABLE usage_events (
    id          TEXT PRIMARY KEY,
    key_id      TEXT NOT NULL REFERENCES license_keys(id),
    machine_id  TEXT NOT NULL,
    product     TEXT NOT NULL,
    event_type  TEXT NOT NULL,                  -- cover_letter_generated |
                                                --   company_research | email_sync |
                                                --   interview_prep | survey | etc.
    metadata    TEXT,                           -- JSON blob for context
    created_at  TEXT NOT NULL
 );
 CREATE TABLE flags (
    id           TEXT PRIMARY KEY,
    key_id       TEXT NOT NULL REFERENCES license_keys(id),
    machine_id   TEXT,
    product      TEXT NOT NULL,
    flag_type    TEXT NOT NULL,                 -- content_violation | tos_violation |
                                                --   abuse | manual
    details      TEXT,                          -- JSON: prompt snippet, output excerpt
    status       TEXT DEFAULT 'open',           -- open | reviewed | dismissed | actioned
    created_at   TEXT NOT NULL,
    reviewed_at  TEXT,
    action_taken TEXT                           -- none | warned | revoked
 );
 CREATE TABLE audit_log (
    id          TEXT PRIMARY KEY,
    entity_type TEXT NOT NULL,                  -- key | activation | flag
    entity_id   TEXT NOT NULL,
    action      TEXT NOT NULL,                  -- created | revoked | activated |
                                                --   deactivated | flag_actioned
    actor       TEXT,                           -- admin identifier (future multi-admin)
    details     TEXT,                           -- JSON
    created_at  TEXT NOT NULL
 );
 ```
 **Flags scope (v1):** Schema and `POST /v1/flag` endpoint capture data. No admin enforcement UI in v1 — query DB directly. Build review UI in v2 when there's data to act on.
 ---
 ## JWT Payload
 ```json
 {
  "sub":      "CFG-PRNG-A1B2-C3D4-E5F6",
  "product":  "peregrine",
  "tier":     "paid",
  "seats":    2,
  "machine":  "a3f9c2…",
  "notice":   "Version 1.1 available — see circuitforge.com/update",
  "iat":      1740000000,
  "exp":      1742592000
 }
 ```
 `notice` is optional — set via a server config value; included in refresh responses so Peregrine can surface it as a banner. No DB table needed.
 ---
 ## Key Format
 `CFG-PRNG-A1B2-C3D4-E5F6`
 - `CFG` — Circuit Forge
 - `PRNG` / `FLCN` / `OSPY` / … — 4-char product code
 - Three random 4-char alphanumeric segments
 - Human-readable, easy to copy/paste into a support email
 ---
 ## Endpoint Reference
 | Method | Path | Auth | Purpose |
 |--------|------|------|---------|
 | POST | `/v1/activate` | none | Issue JWT for key + machine |
 | POST | `/v1/refresh` | JWT bearer | Renew JWT before expiry |
 | POST | `/v1/deactivate` | JWT bearer | Free a seat |
 | POST | `/v1/usage` | JWT bearer | Record usage event (fire-and-forget) |
 | POST | `/v1/flag` | JWT bearer | Report content/ToS violation |
 | POST | `/admin/keys` | admin token | Create a new key |
 | GET | `/admin/keys` | admin token | List all keys + activation counts |
 | DELETE | `/admin/keys/{id}` | admin token | Revoke a key |
 | GET | `/admin/activations` | admin token | Full activation audit |
 | GET | `/admin/usage` | admin token | Usage breakdown per key/product/event |
 | GET | `/admin/flags` | admin token | List flags (open by default) |
 | PATCH | `/admin/flags/{id}` | admin token | Update flag status + action |
 ---
 ## Peregrine Client (`scripts/license.py`)
 **Public API:**
 ```python
 def activate(key: str) -> dict             # POST /v1/activate, writes license.json
 def verify_local() -> dict | None          # validates JWT offline; None = free tier
 def refresh_if_needed() -> None            # silent; called on app startup
 def effective_tier() -> str                # "free"|"paid"|"premium"|"ultra"
 def report_usage(event_type: str,          # fire-and-forget; failures silently dropped
                 metadata: dict = {}) -> None
 def report_flag(flag_type: str,            # fire-and-forget
                details: dict) -> None
 ```
 **`effective_tier()` decision tree:**
 ```
 license.json missing or unreadable     → "free"
 JWT signature invalid                  → "free"
 JWT product != "peregrine"             → "free"
 JWT not expired                        → tier from payload
 JWT expired, within grace period       → tier from payload + show banner
 JWT expired, grace period expired      → "free" + show banner
 ```
 **`config/license.json` (gitignored):**
 ```json
 {
  "jwt":          "eyJ…",
  "key_display":  "CFG-PRNG-A1B2-C3D4-E5F6",
  "tier":         "paid",
  "valid_until":  "2026-03-27",
  "machine_id":   "a3f9c2…",
  "last_refresh": "2026-02-25T12:00:00Z",
  "grace_until":  null
 }
 ```
 **Integration point in `tiers.py`:**
 ```python
 def effective_tier(profile) -> str:
    from scripts.license import effective_tier as _license_tier
    if profile.dev_tier_override:      # dev override still works in dev mode
        return profile.dev_tier_override
    return _license_tier()
 ```
 **Settings License tab** (new tab in `app/pages/2_Settings.py`):
 - Text input: enter license key → calls `activate()` → shows result
 - If active: tier badge, key display string, expiry date, seat count
 - Grace period: amber banner with days remaining
 - "Deactivate this machine" button → `/v1/deactivate`, deletes `license.json`
 ---
 ## Deployment
 **Repo:** `git.opensourcesolarpunk.com/pyr0ball/circuitforge-license` (private)
 **Repo layout:**
 ```
 circuitforge-license/
 ├── app/
 │   ├── main.py          # FastAPI app
 │   ├── db.py            # SQLite helpers, schema init
 │   ├── models.py        # Pydantic models
 │   ├── crypto.py        # RSA sign/verify helpers
 │   └── routes/
 │       ├── public.py    # /v1/* endpoints
 │       └── admin.py     # /admin/* endpoints
 ├── data/                # SQLite DB (named volume)
 ├── keys/
 │   ├── private.pem      # gitignored
 │   └── public.pem       # committed
 ├── scripts/
 │   └── issue-key.sh     # curl wrapper for key issuance
 ├── tests/
 ├── Dockerfile
 ├── docker-compose.yml
 ├── .env.example
 └── requirements.txt
 ```
 **`docker-compose.yml` (on Heimdall):**
 ```yaml
 services:
  license:
    build: .
    restart: unless-stopped
    ports:
      - "127.0.0.1:8600:8600"
    volumes:
      - license_data:/app/data
      - ./keys:/app/keys:ro
    env_file: .env
 volumes:
  license_data:
 ```
 **`.env` (gitignored):**
 ```
 ADMIN_TOKEN=<long random string>
 JWT_PRIVATE_KEY_PATH=/app/keys/private.pem
 JWT_PUBLIC_KEY_PATH=/app/keys/public.pem
 JWT_EXPIRY_DAYS=30
 GRACE_PERIOD_DAYS=7
 ```
 **Caddy block (add to Heimdall Caddyfile):**
 ```caddy
 license.circuitforge.com {
    reverse_proxy localhost:8600
 }
 ```
 ---
 ## Admin Workflow (v1)
 All operations via `curl` or `scripts/issue-key.sh`:
 ```bash
 # Issue a key
 ./scripts/issue-key.sh --product peregrine --tier paid --seats 2 \
  --email user@example.com --notes "Beta — manual payment 2026-02-25"
 # → CFG-PRNG-A1B2-C3D4-E5F6  (email to customer)
 # List all keys
 curl https://license.circuitforge.com/admin/keys \
  -H "Authorization: Bearer $ADMIN_TOKEN"
 # Revoke a key
 curl -X DELETE https://license.circuitforge.com/admin/keys/{id} \
  -H "Authorization: Bearer $ADMIN_TOKEN"
 ```
 ---
 ## Testing Strategy
 **License server:**
 - pytest with in-memory SQLite and generated test keypair
 - All endpoints tested: activate, refresh, deactivate, usage, flag, admin CRUD
 - Seat limit enforcement, expiry, revocation all unit tested
 **Peregrine client:**
 - `verify_local()` tested with pre-signed test JWT using test keypair
 - `activate()` / `refresh()` tested with `httpx` mocks
 - `effective_tier()` tested across all states: valid, expired, grace, revoked, missing
 **Integration smoke test:**
 ```bash
 docker compose up -d
 # create test key via admin API
 # call /v1/activate with test key
 # verify JWT signature with public key
 # verify /v1/refresh extends expiry
 ```
 ---
 ## Decisions Log
 | Decision | Rationale |
 |----------|-----------|
 | RS256 over HS256 | Public key embeddable in client; private key never leaves server |
 | SQLite over Postgres | Matches Peregrine's SQLite-first philosophy; trivially backupable |
 | 30-day JWT lifetime | Standard SaaS pattern; invisible to users in normal operation |
 | 7-day grace period | Covers travel, network outages, server maintenance |
 | Flags v1: capture only | No volume to justify review UI yet; add in v2 |
 | No payment integration | Manual issuance until customer volume justifies automation |
 | Multi-product schema | Adding a column now vs migrating a live DB later |
 | Separate repo | License server is infrastructure, not part of Peregrine's BSL scope |
--- a/docs/plans/2026-02-25-circuitforge-license-plan.md
+++ b/docs/plans/2026-02-25-circuitforge-license-plan.md
--- a/scrapers/companyScraper.py
+++ b/scrapers/companyScraper.py
--- a/scripts/finetune_local.py
+++ b/scripts/finetune_local.py
@ -32,12 +32,7 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 # ── Config ────────────────────────────────────────────────────────────────────
 DEFAULT_MODEL   = "unsloth/Llama-3.2-3B-Instruct"   # safe on 8 GB VRAM
-# DOCS_DIR env var overrides user_profile when running inside Docker
+_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 _docs_env = os.environ.get("DOCS_DIR", "")
 _docs = Path(_docs_env) if _docs_env else (
    _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 )
 LETTERS_JSONL   = _docs / "training_data" / "cover_letters.jsonl"
 OUTPUT_DIR      = _docs / "training_data" / "finetune_output"
 GGUF_DIR        = _docs / "training_data" / "gguf"
@ -71,7 +66,7 @@ print(f"{'='*60}\n")
 # ── Load dataset ──────────────────────────────────────────────────────────────
 if not LETTERS_JSONL.exists():
    sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
-             "Run: make prepare-training  (or: python scripts/prepare_training_data.py)")
+             "Run: conda run -n job-seeker python scripts/prepare_training_data.py")
 records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
 print(f"Loaded {len(records)} training examples.")
@ -227,102 +222,35 @@ if not args.no_gguf and USE_UNSLOTH:
 else:
    gguf_path = None
-# ── Register with Ollama (auto) ────────────────────────────────────────────────
+# ── Print next steps ──────────────────────────────────────────────────────────
 def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool:
    """
    Copy GGUF into the shared Ollama models volume and register via the API.
    Works in two modes:
      Containerised — OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
                      translate the container path into Ollama's view of the file.
      Local         — gguf_path is an absolute path Ollama can read directly.
    """
    import shutil
    import requests
    ollama_url        = os.environ.get("OLLAMA_URL", "http://localhost:11434")
    models_mount      = os.environ.get("OLLAMA_MODELS_MOUNT", "")
    ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
    # ── Place GGUF where Ollama can read it ───────────────────────────────────
    if models_mount and ollama_models_dir:
        # Containerised: write into the shared volume; Ollama reads from its own mount.
        dest_dir = Path(models_mount) / "custom"
        dest_dir.mkdir(parents=True, exist_ok=True)
        dest = dest_dir / gguf_path.name
        if dest != gguf_path:
            print(f"Copying GGUF → shared volume: {dest}")
            shutil.copy2(gguf_path, dest)
        ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
    else:
        # Local: pass the absolute path directly.
        ollama_gguf = str(gguf_path.resolve())
    modelfile_text = (
        f"FROM {ollama_gguf}\n"
        f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
        f"PARAMETER temperature 0.7\n"
        f"PARAMETER top_p 0.9\n"
        f"PARAMETER num_ctx 32768\n"
    )
    # Write Modelfile to disk as a reference (useful for debugging)
    (OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
    # ── Create via Ollama API ─────────────────────────────────────────────────
    print(f"\nRegistering '{model_name}' with Ollama at {ollama_url} …")
    try:
        r = requests.post(
            f"{ollama_url}/api/create",
            json={"name": model_name, "modelfile": modelfile_text},
            timeout=300,
            stream=True,
        )
        for line in r.iter_lines():
            if line:
                import json as _json
                try:
                    msg = _json.loads(line).get("status", "")
                except Exception:
                    msg = line.decode()
                if msg:
                    print(f"  {msg}")
        if r.status_code != 200:
            print(f"  WARNING: Ollama returned HTTP {r.status_code}")
            return False
    except Exception as exc:
        print(f"  Ollama registration failed: {exc}")
        print(f"  Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
        return False
    # ── Update config/llm.yaml ────────────────────────────────────────────────
    llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
    if llm_yaml.exists():
        try:
            import yaml as _yaml
            cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
            if "backends" in cfg and "ollama" in cfg["backends"]:
                cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
                llm_yaml.write_text(
                    _yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
                )
                print(f"  llm.yaml updated → ollama.model = {model_name}:latest")
        except Exception as exc:
            print(f"  Could not update llm.yaml automatically: {exc}")
 print(f"\n{'='*60}")
-    print(f"  Model ready: {model_name}:latest")
+print("  DONE — next steps to load into Ollama:")
-    print(f"  Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
+print(f"{'='*60}")
    print(f"{'='*60}\n")
    return True
 if gguf_path and gguf_path.exists():
-    _auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT)
+    modelfile = OUTPUT_DIR / "Modelfile"
    modelfile.write_text(f"""FROM {gguf_path}
 SYSTEM \"\"\"
 {SYSTEM_PROMPT}
 \"\"\"
 PARAMETER temperature 0.7
 PARAMETER top_p 0.9
 PARAMETER num_ctx 32768
 """)
    print(f"\n1. Modelfile written to: {modelfile}")
    print(f"\n2. Create the Ollama model:")
    print(f"     ollama create {OLLAMA_NAME} -f {modelfile}")
    print(f"\n3. Test it:")
    print(f"     ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
    print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
    print(f"   then pick it in Settings → LLM Backends → Ollama → Model.")
 else:
-    print(f"\n{'='*60}")
+    print(f"\n  Adapter only (no GGUF). To convert manually:")
-    print("  Adapter saved (no GGUF produced).")
+    print(f"  1. Merge adapter:")
-    print(f"  Re-run without --no-gguf to generate a GGUF for Ollama registration.")
+    print(f"       conda run -n ogma python -c \"")
-    print(f"  Adapter path: {adapter_path}")
+    print(f"         from peft import AutoPeftModelForCausalLM")
-    print(f"{'='*60}\n")
+    print(f"         m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
    print(f"         m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
    print(f"  2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
    print(f"  3. ollama create {OLLAMA_NAME} -f Modelfile")
 print()
--- a/scripts/prepare_training_data.py
+++ b/scripts/prepare_training_data.py
@ -12,7 +12,6 @@ Usage:
 """
 import argparse
 import json
 import os
 import re
 import sys
 from pathlib import Path
@ -23,10 +22,7 @@ from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
-_docs_env = os.environ.get("DOCS_DIR", "")
+_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 _docs = Path(_docs_env) if _docs_env else (
    _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 )
 LETTERS_DIR = _docs
 # Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
 LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
@ -81,16 +77,6 @@ def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]:
            if p not in seen:
                seen.add(p)
                all_paths.append(p)
    # Also scan web-uploaded files (Settings → Fine-tune → Upload)
    uploads_dir = letters_dir / "training_data" / "uploads"
    if uploads_dir.exists():
        for glob in ("*.md", "*.txt"):
            for p in uploads_dir.glob(glob):
                if p not in seen:
                    seen.add(p)
                    all_paths.append(p)
    for path in sorted(all_paths):
        text = path.read_text(encoding="utf-8", errors="ignore").strip()
        if not text or len(text) < 100:
--- a/scripts/task_runner.py
+++ b/scripts/task_runner.py
@ -243,17 +243,6 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
            )
            return
        elif task_type == "prepare_training":
            from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
            records = build_records()
            write_jsonl(records, DEFAULT_OUTPUT)
            n = len(records)
            update_task_status(
                db_path, task_id, "completed",
                error=f"{n} training pair{'s' if n != 1 else ''} extracted",
            )
            return
        else:
            raise ValueError(f"Unknown task_type: {task_type!r}")
--- a/setup.sh
+++ b/setup.sh
@ -64,35 +64,6 @@ install_git() {
    success "git installed."
 }
 # ── Podman detection ───────────────────────────────────────────────────────────
 # If Podman is already present, skip Docker entirely and ensure podman-compose is available.
 check_podman() {
    if ! cmd_exists podman; then return 1; fi
    success "Podman detected ($(podman --version)) — skipping Docker install."
    # Ensure a compose provider is available
    if podman compose version &>/dev/null 2>&1; then
        success "podman compose available."
    elif cmd_exists podman-compose; then
        success "podman-compose available."
    else
        info "Installing podman-compose…"
        case "$DISTRO_FAMILY" in
            debian)  $SUDO apt-get install -y podman-compose 2>/dev/null \
                     || pip3 install --user podman-compose ;;
            fedora)  $SUDO dnf install -y podman-compose 2>/dev/null \
                     || pip3 install --user podman-compose ;;
            arch)    $SUDO pacman -Sy --noconfirm podman-compose 2>/dev/null \
                     || pip3 install --user podman-compose ;;
            macos)   brew install podman-compose 2>/dev/null \
                     || pip3 install --user podman-compose ;;
        esac
        success "podman-compose installed."
    fi
    warn "GPU profiles (single-gpu, dual-gpu) require CDI setup:"
    warn "  sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
    return 0
 }
 # ── Docker ─────────────────────────────────────────────────────────────────────
 install_docker_linux_debian() {
    $SUDO apt-get update -q
@ -168,27 +139,6 @@ check_compose() {
    fi
 }
 # ── Docker daemon health check ──────────────────────────────────────────────────
 check_docker_running() {
    if docker info &>/dev/null 2>&1; then
        success "Docker daemon is running."
        return
    fi
    warn "Docker daemon is not responding."
    if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then
        info "Starting Docker service…"
        $SUDO systemctl start docker 2>/dev/null || true
        sleep 2
        if docker info &>/dev/null 2>&1; then
            success "Docker daemon started."
        else
            warn "Docker failed to start. Run: sudo systemctl start docker"
        fi
    elif [[ "$OS" == "Darwin" ]]; then
        warn "Docker Desktop is not running. Start it, wait for the whale icon, then run 'make start'."
    fi
 }
 # ── NVIDIA Container Toolkit ───────────────────────────────────────────────────
 install_nvidia_toolkit() {
    [[ "$OS" != "Linux" ]] && return   # macOS has no NVIDIA support
@ -196,8 +146,8 @@ install_nvidia_toolkit() {
        info "No NVIDIA GPU detected — skipping Container Toolkit."
        return
    fi
-    if cmd_exists nvidia-ctk && nvidia-ctk runtime validate --runtime=docker &>/dev/null 2>&1; then
+    if docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu22.04 nvidia-smi &>/dev/null 2>&1; then
-        success "NVIDIA Container Toolkit already configured."
+        success "NVIDIA Container Toolkit already working."
        return
    fi
    info "NVIDIA GPU detected. Installing Container Toolkit…"
@ -226,8 +176,6 @@ install_nvidia_toolkit() {
 }
 # ── Environment setup ──────────────────────────────────────────────────────────
 # Note: Ollama runs as a Docker container — the compose.yml ollama service
 # handles model download automatically on first start (see docker/ollama/entrypoint.sh).
 setup_env() {
    SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
@ -238,88 +186,29 @@ setup_env() {
    fi
 }
 # ── Model weights storage ───────────────────────────────────────────────────────
 _update_env_key() {
    # Portable in-place key=value update for .env files (Linux + macOS).
    # Appends the key if not already present.
    local file="$1" key="$2" val="$3"
    awk -v k="$key" -v v="$val" '
        BEGIN { found=0 }
        $0 ~ ("^" k "=") { print k "=" v; found=1; next }
        { print }
        END { if (!found) print k "=" v }
    ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
 }
 configure_model_paths() {
    local env_file
    env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
    # Skip prompts when stdin is not a terminal (e.g. curl | bash)
    if [[ ! -t 0 ]]; then
        info "Non-interactive — using default model paths from .env"
        return
    fi
    echo ""
    info "Model weights storage"
    echo -e "  AI models can be 2–30+ GB each. If you have a separate data drive,"
    echo -e "  point these at it now. Press Enter to keep the value shown in [brackets]."
    echo ""
    local current input
    current="$(grep -E '^OLLAMA_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
    [[ -z "$current" ]] && current="~/models/ollama"
    read -rp "  Ollama models dir [${current}]: " input || input=""
    input="${input:-$current}"
    input="${input/#\~/$HOME}"
    mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
    _update_env_key "$env_file" "OLLAMA_MODELS_DIR" "$input"
    success "OLLAMA_MODELS_DIR=$input"
    current="$(grep -E '^VLLM_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
    [[ -z "$current" ]] && current="~/models/vllm"
    read -rp "  vLLM models dir   [${current}]: " input || input=""
    input="${input:-$current}"
    input="${input/#\~/$HOME}"
    mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
    _update_env_key "$env_file" "VLLM_MODELS_DIR" "$input"
    success "VLLM_MODELS_DIR=$input"
    echo ""
 }
 # ── Main ───────────────────────────────────────────────────────────────────────
 main() {
    echo ""
-    echo -e "${BLUE}╔══════════════════════════════════════════════════════╗${NC}"
+    echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
    echo -e "${BLUE}║   Peregrine — Dependency Installer       ║${NC}"
    echo -e "${BLUE}║   by Circuit Forge LLC                   ║${NC}"
-    echo -e "${BLUE}║   \"Don't be evil, for real and forever.\"             ║${NC}"
+    echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
    echo -e "${BLUE}╚══════════════════════════════════════════════════════╝${NC}"
    echo ""
    install_git
    # Podman takes precedence if already installed; otherwise install Docker
    if ! check_podman; then
    install_docker
        check_docker_running
    check_compose
    install_nvidia_toolkit
    fi
    setup_env
    configure_model_paths
    echo ""
    success "All dependencies installed."
    echo ""
    echo -e "  ${GREEN}Next steps:${NC}"
-    echo -e "  1. Start Peregrine:"
+    echo -e "  1. Edit ${YELLOW}.env${NC} to set your preferred ports and model paths"
-    echo -e "     ${YELLOW}make start${NC}             # remote/API-only (no local GPU)"
+    echo -e "  2. Start Peregrine:"
-    echo -e "     ${YELLOW}make start PROFILE=cpu${NC} # local Ollama inference (CPU)"
+    echo -e "     ${YELLOW}docker compose --profile remote up -d${NC}"
-    echo -e "  2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
+    echo -e "  3. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
    echo -e "  (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
    echo ""
    if groups "$USER" 2>/dev/null | grep -q docker; then
        true