Compare commits
10 commits
a7d9bd075a
...
006738f7b3
| Author | SHA1 | Date | |
|---|---|---|---|
| 006738f7b3 | |||
| dc4a08c063 | |||
| 4d66c04d1e | |||
| 6c895b5a9b | |||
| 1bcbff395d | |||
| f55f7b78fc | |||
| 9a4db4e18c | |||
| 71e30be9b9 | |||
| 23b0703485 | |||
| 0bc17a1d84 |
20 changed files with 4127 additions and 111 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -22,3 +22,8 @@ config/user.yaml
|
|||
config/.backup-*
|
||||
config/integrations/*.yaml
|
||||
!config/integrations/*.yaml.example
|
||||
|
||||
# companyScraper runtime artifacts
|
||||
scrapers/.cache/
|
||||
scrapers/.debug/
|
||||
scrapers/raw_scrapes/
|
||||
|
|
|
|||
38
Dockerfile.finetune
Normal file
38
Dockerfile.finetune
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
|
||||
# Large image (~12-15 GB after build). Built once, cached on rebuilds.
|
||||
# GPU strongly recommended. CPU fallback works but training is very slow.
|
||||
#
|
||||
# Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
|
||||
# If your GPU requires a different CUDA version, change the FROM line and
|
||||
# reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
|
||||
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Build tools needed by bitsandbytes CUDA kernels and unsloth
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc g++ git libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install training stack.
|
||||
# unsloth detects CUDA version automatically from the base image.
|
||||
RUN pip install --no-cache-dir \
|
||||
"unsloth @ git+https://github.com/unslothai/unsloth.git" \
|
||||
"datasets>=2.18" "trl>=0.8" peft transformers \
|
||||
"bitsandbytes>=0.43.0" accelerate sentencepiece \
|
||||
requests pyyaml
|
||||
|
||||
COPY scripts/ /app/scripts/
|
||||
COPY config/ /app/config/
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
# Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
# Runtime env vars injected by compose.yml:
|
||||
# OLLAMA_URL — Ollama API base (default: http://ollama:11434)
|
||||
# OLLAMA_MODELS_MOUNT — finetune container's mount path for ollama models volume
|
||||
# OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
|
||||
# DOCS_DIR — cover letters + training data root (default: /docs)
|
||||
|
||||
ENTRYPOINT ["python", "scripts/finetune_local.py"]
|
||||
46
Makefile
46
Makefile
|
|
@ -1,36 +1,66 @@
|
|||
# Makefile — Peregrine convenience targets
|
||||
# Usage: make <target>
|
||||
|
||||
.PHONY: setup preflight start stop restart logs test clean help
|
||||
.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help
|
||||
|
||||
PROFILE ?= remote
|
||||
PYTHON ?= python3
|
||||
|
||||
setup: ## Install dependencies (Docker, NVIDIA toolkit)
|
||||
# Auto-detect container engine: prefer docker compose, fall back to podman
|
||||
COMPOSE ?= $(shell \
|
||||
command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
|
||||
&& echo "docker compose" \
|
||||
|| (command -v podman >/dev/null 2>&1 \
|
||||
&& podman compose version >/dev/null 2>&1 \
|
||||
&& echo "podman compose" \
|
||||
|| echo "podman-compose"))
|
||||
|
||||
# GPU profiles require an overlay for NVIDIA device reservations.
|
||||
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
|
||||
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||
COMPOSE_FILES := -f compose.yml
|
||||
ifneq (,$(findstring podman,$(COMPOSE)))
|
||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
|
||||
endif
|
||||
else
|
||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
|
||||
endif
|
||||
endif
|
||||
|
||||
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
|
||||
@bash setup.sh
|
||||
|
||||
preflight: ## Check ports + system resources; write .env
|
||||
@$(PYTHON) scripts/preflight.py
|
||||
|
||||
start: preflight ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
|
||||
docker compose --profile $(PROFILE) up -d
|
||||
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d
|
||||
|
||||
stop: ## Stop all Peregrine services
|
||||
docker compose down
|
||||
$(COMPOSE) down
|
||||
|
||||
restart: preflight ## Preflight check then restart all services
|
||||
docker compose down && docker compose --profile $(PROFILE) up -d
|
||||
$(COMPOSE) down && $(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d
|
||||
|
||||
logs: ## Tail app logs
|
||||
docker compose logs -f app
|
||||
$(COMPOSE) logs -f app
|
||||
|
||||
test: ## Run the test suite
|
||||
$(PYTHON) -m pytest tests/ -v
|
||||
@$(PYTHON) -m pytest tests/ -v
|
||||
|
||||
prepare-training: ## Scan docs_dir for cover letters and build training JSONL
|
||||
$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
|
||||
|
||||
finetune: ## Fine-tune your personal cover letter model (run prepare-training first)
|
||||
@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
|
||||
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune
|
||||
|
||||
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
|
||||
@echo "WARNING: This will delete all Peregrine containers and data."
|
||||
@read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ]
|
||||
docker compose down --rmi local --volumes
|
||||
$(COMPOSE) down --rmi local --volumes
|
||||
|
||||
help: ## Show this help
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**
|
||||
|
||||
> *"Don't be evil, for real and forever."*
|
||||
|
||||
Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
|
||||
Privacy-first, local-first. Your data never leaves your machine.
|
||||
|
||||
|
|
|
|||
|
|
@ -403,9 +403,9 @@ elif step == 5:
|
|||
st.caption("Change only if services run on non-default ports or remote hosts.")
|
||||
svc = dict(saved_yaml.get("services", {}))
|
||||
for svc_name, default_host, default_port in [
|
||||
("ollama", "localhost", 11434),
|
||||
("vllm", "localhost", 8000),
|
||||
("searxng", "localhost", 8888),
|
||||
("ollama", "ollama", 11434), # Docker service name
|
||||
("vllm", "vllm", 8000), # Docker service name
|
||||
("searxng", "searxng", 8080), # Docker internal port (host-mapped: 8888)
|
||||
]:
|
||||
c1, c2 = st.columns([3, 1])
|
||||
svc[f"{svc_name}_host"] = c1.text_input(
|
||||
|
|
|
|||
|
|
@ -1026,9 +1026,10 @@ with tab_finetune:
|
|||
|
||||
if ft_step == 1:
|
||||
st.markdown("**Step 1: Upload Cover Letters**")
|
||||
st.caption("Accepted formats: `.md` or `.txt`. Convert PDFs to text before uploading.")
|
||||
uploaded = st.file_uploader(
|
||||
"Upload cover letters (PDF, DOCX, or TXT)",
|
||||
type=["pdf", "docx", "txt"],
|
||||
"Upload cover letters (.md or .txt)",
|
||||
type=["md", "txt"],
|
||||
accept_multiple_files=True,
|
||||
)
|
||||
if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"):
|
||||
|
|
@ -1040,18 +1041,45 @@ with tab_finetune:
|
|||
st.rerun()
|
||||
|
||||
elif ft_step == 2:
|
||||
st.markdown("**Step 2: Preview Training Pairs**")
|
||||
st.info("Run `python scripts/prepare_training_data.py` to extract pairs, then return here.")
|
||||
st.markdown("**Step 2: Extract Training Pairs**")
|
||||
import json as _json
|
||||
import sqlite3 as _sqlite3
|
||||
from scripts.db import DEFAULT_DB as _FT_DB
|
||||
|
||||
jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl"
|
||||
|
||||
# Show task status
|
||||
_ft_conn = _sqlite3.connect(_FT_DB)
|
||||
_ft_conn.row_factory = _sqlite3.Row
|
||||
_ft_task = _ft_conn.execute(
|
||||
"SELECT * FROM background_tasks WHERE task_type='prepare_training' ORDER BY id DESC LIMIT 1"
|
||||
).fetchone()
|
||||
_ft_conn.close()
|
||||
|
||||
if _ft_task:
|
||||
_ft_status = _ft_task["status"]
|
||||
if _ft_status == "completed":
|
||||
st.success(f"✅ {_ft_task['error'] or 'Extraction complete'}")
|
||||
elif _ft_status in ("running", "queued"):
|
||||
st.info(f"⏳ {_ft_status.capitalize()}… refresh to check progress.")
|
||||
elif _ft_status == "failed":
|
||||
st.error(f"Extraction failed: {_ft_task['error']}")
|
||||
|
||||
if st.button("⚙️ Extract Training Pairs", type="primary", key="ft_extract2"):
|
||||
from scripts.task_runner import submit_task as _ft_submit
|
||||
_ft_submit(_FT_DB, "prepare_training", 0)
|
||||
st.info("Extracting in the background — refresh in a moment.")
|
||||
st.rerun()
|
||||
|
||||
if jsonl_path.exists():
|
||||
import json as _json
|
||||
pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()]
|
||||
st.caption(f"{len(pairs)} training pairs extracted.")
|
||||
st.caption(f"{len(pairs)} training pairs ready.")
|
||||
for i, p in enumerate(pairs[:3]):
|
||||
with st.expander(f"Pair {i+1}"):
|
||||
st.text(p.get("input", "")[:300])
|
||||
st.text(p.get("output", p.get("input", ""))[:300])
|
||||
else:
|
||||
st.warning("No training pairs found. Run `prepare_training_data.py` first.")
|
||||
st.caption("No training pairs yet — click Extract above.")
|
||||
|
||||
col_back, col_next = st.columns([1, 4])
|
||||
if col_back.button("← Back", key="ft_back2"):
|
||||
st.session_state.ft_step = 1
|
||||
|
|
@ -1061,13 +1089,45 @@ with tab_finetune:
|
|||
st.rerun()
|
||||
|
||||
elif ft_step == 3:
|
||||
st.markdown("**Step 3: Train**")
|
||||
st.slider("Epochs", 3, 20, 10, key="ft_epochs")
|
||||
if st.button("🚀 Start Fine-Tune", type="primary", key="ft_start"):
|
||||
st.info("Fine-tune queued as a background task. Check back in 30–60 minutes.")
|
||||
if st.button("← Back", key="ft_back3"):
|
||||
st.markdown("**Step 3: Fine-Tune**")
|
||||
|
||||
_ft_profile_name = ((_profile.name.split() or ["cover"])[0].lower()
|
||||
if _profile else "cover")
|
||||
_ft_model_name = f"{_ft_profile_name}-cover-writer"
|
||||
|
||||
st.info(
|
||||
"Run the command below from your terminal. Training takes 30–90 min on GPU "
|
||||
"and registers the model automatically when complete."
|
||||
)
|
||||
st.code("make finetune PROFILE=single-gpu", language="bash")
|
||||
st.caption(
|
||||
f"Your model will appear as **{_ft_model_name}:latest** in Ollama. "
|
||||
"Cover letter generation will use it automatically."
|
||||
)
|
||||
|
||||
st.markdown("**Model status:**")
|
||||
try:
|
||||
import os as _os
|
||||
import requests as _ft_req
|
||||
_ollama_url = _os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
_tags = _ft_req.get(f"{_ollama_url}/api/tags", timeout=3)
|
||||
if _tags.status_code == 200:
|
||||
_model_names = [m["name"] for m in _tags.json().get("models", [])]
|
||||
if any(_ft_model_name in m for m in _model_names):
|
||||
st.success(f"✅ `{_ft_model_name}:latest` is ready in Ollama!")
|
||||
else:
|
||||
st.warning(f"⏳ `{_ft_model_name}:latest` not registered yet.")
|
||||
else:
|
||||
st.caption("Ollama returned an unexpected response.")
|
||||
except Exception:
|
||||
st.caption("Could not reach Ollama — ensure services are running with `make start`.")
|
||||
|
||||
col_back, col_refresh = st.columns([1, 3])
|
||||
if col_back.button("← Back", key="ft_back3"):
|
||||
st.session_state.ft_step = 2
|
||||
st.rerun()
|
||||
if col_refresh.button("🔄 Check model status", key="ft_refresh3"):
|
||||
st.rerun()
|
||||
|
||||
# ── Developer tab ─────────────────────────────────────────────────────────────
|
||||
if _show_dev_tab:
|
||||
|
|
|
|||
46
compose.gpu.yml
Normal file
46
compose.gpu.yml
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# compose.gpu.yml — Docker NVIDIA GPU overlay
|
||||
#
|
||||
# Adds NVIDIA GPU reservations to Peregrine services.
|
||||
# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
|
||||
# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
|
||||
#
|
||||
# Prerequisites:
|
||||
# sudo nvidia-ctk runtime configure --runtime=docker
|
||||
# sudo systemctl restart docker
|
||||
#
|
||||
services:
|
||||
ollama:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["0"]
|
||||
capabilities: [gpu]
|
||||
|
||||
vision:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["0"]
|
||||
capabilities: [gpu]
|
||||
|
||||
vllm:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["1"]
|
||||
capabilities: [gpu]
|
||||
|
||||
finetune:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["0"]
|
||||
capabilities: [gpu]
|
||||
43
compose.podman-gpu.yml
Normal file
43
compose.podman-gpu.yml
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# compose.podman-gpu.yml — Podman GPU override
|
||||
#
|
||||
# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
|
||||
# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
|
||||
# when podman/podman-compose is detected, or manually:
|
||||
# podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
|
||||
#
|
||||
# Prerequisites:
|
||||
# sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||
# (requires nvidia-container-toolkit >= 1.14)
|
||||
#
|
||||
services:
|
||||
ollama:
|
||||
devices:
|
||||
- nvidia.com/gpu=0
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices: []
|
||||
|
||||
vision:
|
||||
devices:
|
||||
- nvidia.com/gpu=0
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices: []
|
||||
|
||||
vllm:
|
||||
devices:
|
||||
- nvidia.com/gpu=1
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices: []
|
||||
|
||||
finetune:
|
||||
devices:
|
||||
- nvidia.com/gpu=0
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices: []
|
||||
46
compose.yml
46
compose.yml
|
|
@ -12,6 +12,7 @@ services:
|
|||
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
||||
environment:
|
||||
- STAGING_DB=/app/data/staging.db
|
||||
- DOCS_DIR=/docs
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
|
||||
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
||||
|
|
@ -47,18 +48,6 @@ services:
|
|||
profiles: [cpu, single-gpu, dual-gpu]
|
||||
restart: unless-stopped
|
||||
|
||||
ollama-gpu:
|
||||
extends:
|
||||
service: ollama
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["0"]
|
||||
capabilities: [gpu]
|
||||
profiles: [single-gpu, dual-gpu]
|
||||
|
||||
vision:
|
||||
build:
|
||||
context: .
|
||||
|
|
@ -68,13 +57,6 @@ services:
|
|||
environment:
|
||||
- VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
|
||||
- VISION_REVISION=${VISION_REVISION:-2025-01-09}
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["0"]
|
||||
capabilities: [gpu]
|
||||
profiles: [single-gpu, dual-gpu]
|
||||
restart: unless-stopped
|
||||
|
||||
|
|
@ -92,12 +74,24 @@ services:
|
|||
--enforce-eager
|
||||
--max-num-seqs 8
|
||||
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["1"]
|
||||
capabilities: [gpu]
|
||||
profiles: [dual-gpu]
|
||||
restart: unless-stopped
|
||||
|
||||
finetune:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.finetune
|
||||
volumes:
|
||||
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
||||
- ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
|
||||
- ./config:/app/config
|
||||
environment:
|
||||
- DOCS_DIR=/docs
|
||||
- OLLAMA_URL=http://ollama:11434
|
||||
- OLLAMA_MODELS_MOUNT=/ollama-models
|
||||
- OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
|
||||
depends_on:
|
||||
ollama:
|
||||
condition: service_started
|
||||
profiles: [cpu, single-gpu, dual-gpu]
|
||||
restart: "no"
|
||||
|
|
|
|||
|
|
@ -21,26 +21,26 @@ backends:
|
|||
type: openai_compat
|
||||
ollama:
|
||||
api_key: ollama
|
||||
base_url: http://localhost:11434/v1
|
||||
base_url: http://ollama:11434/v1
|
||||
enabled: true
|
||||
model: meghan-cover-writer:latest
|
||||
model: llama3.2:3b
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
ollama_research:
|
||||
api_key: ollama
|
||||
base_url: http://localhost:11434/v1
|
||||
base_url: http://ollama:11434/v1
|
||||
enabled: true
|
||||
model: llama3.1:8b
|
||||
model: llama3.2:3b
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
vision_service:
|
||||
base_url: http://localhost:8002
|
||||
base_url: http://vision:8002
|
||||
enabled: true
|
||||
supports_images: true
|
||||
type: vision_service
|
||||
vllm:
|
||||
api_key: ''
|
||||
base_url: http://localhost:8000/v1
|
||||
base_url: http://vllm:8000/v1
|
||||
enabled: true
|
||||
model: __auto__
|
||||
supports_images: false
|
||||
|
|
|
|||
|
|
@ -21,21 +21,21 @@ backends:
|
|||
supports_images: false
|
||||
ollama:
|
||||
api_key: ollama
|
||||
base_url: http://localhost:11434/v1
|
||||
base_url: http://ollama:11434/v1 # Docker service name; use localhost:11434 outside Docker
|
||||
enabled: true
|
||||
model: meghan-cover-writer:latest
|
||||
model: llama3.2:3b
|
||||
type: openai_compat
|
||||
supports_images: false
|
||||
ollama_research:
|
||||
api_key: ollama
|
||||
base_url: http://localhost:11434/v1
|
||||
base_url: http://ollama:11434/v1 # Docker service name; use localhost:11434 outside Docker
|
||||
enabled: true
|
||||
model: llama3.1:8b
|
||||
model: llama3.2:3b
|
||||
type: openai_compat
|
||||
supports_images: false
|
||||
vllm:
|
||||
api_key: ''
|
||||
base_url: http://localhost:8000/v1
|
||||
base_url: http://vllm:8000/v1 # Docker service name; use localhost:8000 outside Docker
|
||||
enabled: true
|
||||
model: __auto__
|
||||
type: openai_compat
|
||||
|
|
|
|||
|
|
@ -44,15 +44,15 @@ inference_profile: "remote" # remote | cpu | single-gpu | dual-gpu
|
|||
|
||||
services:
|
||||
streamlit_port: 8501
|
||||
ollama_host: localhost
|
||||
ollama_host: ollama # Docker service name; use "localhost" if running outside Docker
|
||||
ollama_port: 11434
|
||||
ollama_ssl: false
|
||||
ollama_ssl_verify: true
|
||||
vllm_host: localhost
|
||||
vllm_host: vllm # Docker service name; use "localhost" if running outside Docker
|
||||
vllm_port: 8000
|
||||
vllm_ssl: false
|
||||
vllm_ssl_verify: true
|
||||
searxng_host: localhost
|
||||
searxng_port: 8888
|
||||
searxng_host: searxng # Docker service name; use "localhost" if running outside Docker
|
||||
searxng_port: 8080 # internal Docker port; use 8888 for host-mapped access
|
||||
searxng_ssl: false
|
||||
searxng_ssl_verify: true
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ Unscheduled ideas and deferred features. Roughly grouped by area.
|
|||
|
||||
## Container Runtime
|
||||
|
||||
- **Podman support** — Update `Makefile` to auto-detect `docker compose` vs `podman-compose` (e.g. `COMPOSE ?= $(shell command -v docker 2>/dev/null && echo "docker compose" || echo "podman-compose")`). Note in README that rootless Podman requires CDI GPU device spec (`nvidia.com/gpu=all`) instead of `runtime: nvidia` in `compose.yml`.
|
||||
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
|
||||
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
|
||||
|
||||
---
|
||||
|
|
|
|||
367
docs/plans/2026-02-25-circuitforge-license-design.md
Normal file
367
docs/plans/2026-02-25-circuitforge-license-design.md
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
# CircuitForge License Server — Design Document
|
||||
|
||||
**Date:** 2026-02-25
|
||||
**Status:** Approved — ready for implementation
|
||||
|
||||
---
|
||||
|
||||
## Goal
|
||||
|
||||
Build a self-hosted licensing server for Circuit Forge LLC products. v1 serves Peregrine; schema is multi-product from day one. Enforces free / paid / premium / ultra tier gates with offline-capable JWT validation, 30-day refresh cycle, 7-day grace period, seat tracking, usage telemetry, and a content violation flagging foundation.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ circuitforge-license (Heimdall:8600) │
|
||||
│ FastAPI + SQLite + RS256 JWT │
|
||||
│ │
|
||||
│ Public API (/v1/…): │
|
||||
│ POST /v1/activate → issue JWT │
|
||||
│ POST /v1/refresh → renew JWT │
|
||||
│ POST /v1/deactivate → free a seat │
|
||||
│ POST /v1/usage → record usage event │
|
||||
│ POST /v1/flag → report violation │
|
||||
│ │
|
||||
│ Admin API (/admin/…, bearer token): │
|
||||
│ POST/GET /admin/keys → CRUD keys │
|
||||
│ DELETE /admin/keys/{id} → revoke │
|
||||
│ GET /admin/activations → audit │
|
||||
│ GET /admin/usage → telemetry │
|
||||
│ GET/PATCH /admin/flags → flag review │
|
||||
└─────────────────────────────────────────────────┘
|
||||
↑ HTTPS via Caddy (license.circuitforge.com)
|
||||
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ Peregrine (user's machine) │
|
||||
│ scripts/license.py │
|
||||
│ │
|
||||
│ activate(key) → POST /v1/activate │
|
||||
│ writes config/license.json │
|
||||
│ verify_local() → validates JWT offline │
|
||||
│ using embedded public key │
|
||||
│ refresh_if_needed() → called on app startup │
|
||||
│ effective_tier() → tier string for can_use() │
|
||||
│ report_usage(…) → fire-and-forget telemetry │
|
||||
│ report_flag(…) → fire-and-forget violation │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Key properties:**
|
||||
- Peregrine verifies tier **offline** on every check — RS256 public key embedded at build time
|
||||
- Network required only at activation and 30-day refresh
|
||||
- Revoked keys stop working at next refresh cycle (≤30 day lag — acceptable for v1)
|
||||
- `config/license.json` gitignored; missing = free tier
|
||||
|
||||
---
|
||||
|
||||
## Crypto: RS256 (asymmetric JWT)
|
||||
|
||||
- **Private key** — lives only on the license server (`keys/private.pem`, gitignored)
|
||||
- **Public key** — committed to both the license server repo and Peregrine (`scripts/license_public_key.pem`)
|
||||
- Peregrine can verify JWT authenticity without ever knowing the private key
|
||||
- A stolen JWT cannot be forged without the private key
|
||||
- Revocation: server refuses refresh; old JWT valid until expiry then grace period expires
|
||||
|
||||
**Key generation (one-time, on Heimdall):**
|
||||
```bash
|
||||
openssl genrsa -out keys/private.pem 2048
|
||||
openssl rsa -in keys/private.pem -pubout -out keys/public.pem
|
||||
# copy keys/public.pem → peregrine/scripts/license_public_key.pem
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Database Schema
|
||||
|
||||
```sql
|
||||
CREATE TABLE license_keys (
|
||||
id TEXT PRIMARY KEY, -- UUID
|
||||
key_display TEXT UNIQUE NOT NULL, -- CFG-PRNG-XXXX-XXXX-XXXX
|
||||
product TEXT NOT NULL, -- peregrine | falcon | osprey | …
|
||||
tier TEXT NOT NULL, -- paid | premium | ultra
|
||||
seats INTEGER DEFAULT 1,
|
||||
valid_until TEXT, -- ISO date or NULL (perpetual)
|
||||
revoked INTEGER DEFAULT 0,
|
||||
customer_email TEXT, -- proper field, not buried in notes
|
||||
source TEXT DEFAULT 'manual', -- manual | beta | promo | stripe
|
||||
trial INTEGER DEFAULT 0, -- 1 = time-limited trial key
|
||||
notes TEXT,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE activations (
|
||||
id TEXT PRIMARY KEY,
|
||||
key_id TEXT NOT NULL REFERENCES license_keys(id),
|
||||
machine_id TEXT NOT NULL, -- sha256(hostname + MAC)
|
||||
app_version TEXT, -- Peregrine version at last refresh
|
||||
platform TEXT, -- linux | macos | windows | docker
|
||||
activated_at TEXT NOT NULL,
|
||||
last_refresh TEXT NOT NULL,
|
||||
deactivated_at TEXT -- NULL = still active
|
||||
);
|
||||
|
||||
CREATE TABLE usage_events (
|
||||
id TEXT PRIMARY KEY,
|
||||
key_id TEXT NOT NULL REFERENCES license_keys(id),
|
||||
machine_id TEXT NOT NULL,
|
||||
product TEXT NOT NULL,
|
||||
event_type TEXT NOT NULL, -- cover_letter_generated |
|
||||
-- company_research | email_sync |
|
||||
-- interview_prep | survey | etc.
|
||||
metadata TEXT, -- JSON blob for context
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE flags (
|
||||
id TEXT PRIMARY KEY,
|
||||
key_id TEXT NOT NULL REFERENCES license_keys(id),
|
||||
machine_id TEXT,
|
||||
product TEXT NOT NULL,
|
||||
flag_type TEXT NOT NULL, -- content_violation | tos_violation |
|
||||
-- abuse | manual
|
||||
details TEXT, -- JSON: prompt snippet, output excerpt
|
||||
status TEXT DEFAULT 'open', -- open | reviewed | dismissed | actioned
|
||||
created_at TEXT NOT NULL,
|
||||
reviewed_at TEXT,
|
||||
action_taken TEXT -- none | warned | revoked
|
||||
);
|
||||
|
||||
CREATE TABLE audit_log (
|
||||
id TEXT PRIMARY KEY,
|
||||
entity_type TEXT NOT NULL, -- key | activation | flag
|
||||
entity_id TEXT NOT NULL,
|
||||
action TEXT NOT NULL, -- created | revoked | activated |
|
||||
-- deactivated | flag_actioned
|
||||
actor TEXT, -- admin identifier (future multi-admin)
|
||||
details TEXT, -- JSON
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
```
|
||||
|
||||
**Flags scope (v1):** Schema and `POST /v1/flag` endpoint capture data. No admin enforcement UI in v1 — query DB directly. Build review UI in v2 when there's data to act on.
|
||||
|
||||
---
|
||||
|
||||
## JWT Payload
|
||||
|
||||
```json
|
||||
{
|
||||
"sub": "CFG-PRNG-A1B2-C3D4-E5F6",
|
||||
"product": "peregrine",
|
||||
"tier": "paid",
|
||||
"seats": 2,
|
||||
"machine": "a3f9c2…",
|
||||
"notice": "Version 1.1 available — see circuitforge.com/update",
|
||||
"iat": 1740000000,
|
||||
"exp": 1742592000
|
||||
}
|
||||
```
|
||||
|
||||
`notice` is optional — set via a server config value; included in refresh responses so Peregrine can surface it as a banner. No DB table needed.
|
||||
|
||||
---
|
||||
|
||||
## Key Format
|
||||
|
||||
`CFG-PRNG-A1B2-C3D4-E5F6`
|
||||
|
||||
- `CFG` — Circuit Forge
|
||||
- `PRNG` / `FLCN` / `OSPY` / … — 4-char product code
|
||||
- Three random 4-char alphanumeric segments
|
||||
- Human-readable, easy to copy/paste into a support email
|
||||
|
||||
---
|
||||
|
||||
## Endpoint Reference
|
||||
|
||||
| Method | Path | Auth | Purpose |
|
||||
|--------|------|------|---------|
|
||||
| POST | `/v1/activate` | none | Issue JWT for key + machine |
|
||||
| POST | `/v1/refresh` | JWT bearer | Renew JWT before expiry |
|
||||
| POST | `/v1/deactivate` | JWT bearer | Free a seat |
|
||||
| POST | `/v1/usage` | JWT bearer | Record usage event (fire-and-forget) |
|
||||
| POST | `/v1/flag` | JWT bearer | Report content/ToS violation |
|
||||
| POST | `/admin/keys` | admin token | Create a new key |
|
||||
| GET | `/admin/keys` | admin token | List all keys + activation counts |
|
||||
| DELETE | `/admin/keys/{id}` | admin token | Revoke a key |
|
||||
| GET | `/admin/activations` | admin token | Full activation audit |
|
||||
| GET | `/admin/usage` | admin token | Usage breakdown per key/product/event |
|
||||
| GET | `/admin/flags` | admin token | List flags (open by default) |
|
||||
| PATCH | `/admin/flags/{id}` | admin token | Update flag status + action |
|
||||
|
||||
---
|
||||
|
||||
## Peregrine Client (`scripts/license.py`)
|
||||
|
||||
**Public API:**
|
||||
```python
|
||||
def activate(key: str) -> dict # POST /v1/activate, writes license.json
|
||||
def verify_local() -> dict | None # validates JWT offline; None = free tier
|
||||
def refresh_if_needed() -> None # silent; called on app startup
|
||||
def effective_tier() -> str # "free"|"paid"|"premium"|"ultra"
|
||||
def report_usage(event_type: str, # fire-and-forget; failures silently dropped
|
||||
metadata: dict = {}) -> None
|
||||
def report_flag(flag_type: str, # fire-and-forget
|
||||
details: dict) -> None
|
||||
```
|
||||
|
||||
**`effective_tier()` decision tree:**
|
||||
```
|
||||
license.json missing or unreadable → "free"
|
||||
JWT signature invalid → "free"
|
||||
JWT product != "peregrine" → "free"
|
||||
JWT not expired → tier from payload
|
||||
JWT expired, within grace period → tier from payload + show banner
|
||||
JWT expired, grace period expired → "free" + show banner
|
||||
```
|
||||
|
||||
**`config/license.json` (gitignored):**
|
||||
```json
|
||||
{
|
||||
"jwt": "eyJ…",
|
||||
"key_display": "CFG-PRNG-A1B2-C3D4-E5F6",
|
||||
"tier": "paid",
|
||||
"valid_until": "2026-03-27",
|
||||
"machine_id": "a3f9c2…",
|
||||
"last_refresh": "2026-02-25T12:00:00Z",
|
||||
"grace_until": null
|
||||
}
|
||||
```
|
||||
|
||||
**Integration point in `tiers.py`:**
|
||||
```python
|
||||
def effective_tier(profile) -> str:
|
||||
from scripts.license import effective_tier as _license_tier
|
||||
if profile.dev_tier_override: # dev override still works in dev mode
|
||||
return profile.dev_tier_override
|
||||
return _license_tier()
|
||||
```
|
||||
|
||||
**Settings License tab** (new tab in `app/pages/2_Settings.py`):
|
||||
- Text input: enter license key → calls `activate()` → shows result
|
||||
- If active: tier badge, key display string, expiry date, seat count
|
||||
- Grace period: amber banner with days remaining
|
||||
- "Deactivate this machine" button → `/v1/deactivate`, deletes `license.json`
|
||||
|
||||
---
|
||||
|
||||
## Deployment
|
||||
|
||||
**Repo:** `git.opensourcesolarpunk.com/pyr0ball/circuitforge-license` (private)
|
||||
|
||||
**Repo layout:**
|
||||
```
|
||||
circuitforge-license/
|
||||
├── app/
|
||||
│ ├── main.py # FastAPI app
|
||||
│ ├── db.py # SQLite helpers, schema init
|
||||
│ ├── models.py # Pydantic models
|
||||
│ ├── crypto.py # RSA sign/verify helpers
|
||||
│ └── routes/
|
||||
│ ├── public.py # /v1/* endpoints
|
||||
│ └── admin.py # /admin/* endpoints
|
||||
├── data/ # SQLite DB (named volume)
|
||||
├── keys/
|
||||
│ ├── private.pem # gitignored
|
||||
│ └── public.pem # committed
|
||||
├── scripts/
|
||||
│ └── issue-key.sh # curl wrapper for key issuance
|
||||
├── tests/
|
||||
├── Dockerfile
|
||||
├── docker-compose.yml
|
||||
├── .env.example
|
||||
└── requirements.txt
|
||||
```
|
||||
|
||||
**`docker-compose.yml` (on Heimdall):**
|
||||
```yaml
|
||||
services:
|
||||
license:
|
||||
build: .
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:8600:8600"
|
||||
volumes:
|
||||
- license_data:/app/data
|
||||
- ./keys:/app/keys:ro
|
||||
env_file: .env
|
||||
|
||||
volumes:
|
||||
license_data:
|
||||
```
|
||||
|
||||
**`.env` (gitignored):**
|
||||
```
|
||||
ADMIN_TOKEN=<long random string>
|
||||
JWT_PRIVATE_KEY_PATH=/app/keys/private.pem
|
||||
JWT_PUBLIC_KEY_PATH=/app/keys/public.pem
|
||||
JWT_EXPIRY_DAYS=30
|
||||
GRACE_PERIOD_DAYS=7
|
||||
```
|
||||
|
||||
**Caddy block (add to Heimdall Caddyfile):**
|
||||
```caddy
|
||||
license.circuitforge.com {
|
||||
reverse_proxy localhost:8600
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Admin Workflow (v1)
|
||||
|
||||
All operations via `curl` or `scripts/issue-key.sh`:
|
||||
|
||||
```bash
|
||||
# Issue a key
|
||||
./scripts/issue-key.sh --product peregrine --tier paid --seats 2 \
|
||||
--email user@example.com --notes "Beta — manual payment 2026-02-25"
|
||||
# → CFG-PRNG-A1B2-C3D4-E5F6 (email to customer)
|
||||
|
||||
# List all keys
|
||||
curl https://license.circuitforge.com/admin/keys \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN"
|
||||
|
||||
# Revoke a key
|
||||
curl -X DELETE https://license.circuitforge.com/admin/keys/{id} \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
**License server:**
|
||||
- pytest with in-memory SQLite and generated test keypair
|
||||
- All endpoints tested: activate, refresh, deactivate, usage, flag, admin CRUD
|
||||
- Seat limit enforcement, expiry, revocation all unit tested
|
||||
|
||||
**Peregrine client:**
|
||||
- `verify_local()` tested with pre-signed test JWT using test keypair
|
||||
- `activate()` / `refresh()` tested with `httpx` mocks
|
||||
- `effective_tier()` tested across all states: valid, expired, grace, revoked, missing
|
||||
|
||||
**Integration smoke test:**
|
||||
```bash
|
||||
docker compose up -d
|
||||
# create test key via admin API
|
||||
# call /v1/activate with test key
|
||||
# verify JWT signature with public key
|
||||
# verify /v1/refresh extends expiry
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Decisions Log
|
||||
|
||||
| Decision | Rationale |
|
||||
|----------|-----------|
|
||||
| RS256 over HS256 | Public key embeddable in client; private key never leaves server |
|
||||
| SQLite over Postgres | Matches Peregrine's SQLite-first philosophy; trivially backupable |
|
||||
| 30-day JWT lifetime | Standard SaaS pattern; invisible to users in normal operation |
|
||||
| 7-day grace period | Covers travel, network outages, server maintenance |
|
||||
| Flags v1: capture only | No volume to justify review UI yet; add in v2 |
|
||||
| No payment integration | Manual issuance until customer volume justifies automation |
|
||||
| Multi-product schema | Adding a column now vs migrating a live DB later |
|
||||
| Separate repo | License server is infrastructure, not part of Peregrine's BSL scope |
|
||||
2197
docs/plans/2026-02-25-circuitforge-license-plan.md
Normal file
2197
docs/plans/2026-02-25-circuitforge-license-plan.md
Normal file
File diff suppressed because it is too large
Load diff
1026
scrapers/companyScraper.py
Executable file
1026
scrapers/companyScraper.py
Executable file
File diff suppressed because it is too large
Load diff
|
|
@ -32,7 +32,12 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
|||
# ── Config ────────────────────────────────────────────────────────────────────
|
||||
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
||||
|
||||
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||
# DOCS_DIR env var overrides user_profile when running inside Docker
|
||||
_docs_env = os.environ.get("DOCS_DIR", "")
|
||||
_docs = Path(_docs_env) if _docs_env else (
|
||||
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||
)
|
||||
|
||||
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
||||
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
||||
GGUF_DIR = _docs / "training_data" / "gguf"
|
||||
|
|
@ -66,7 +71,7 @@ print(f"{'='*60}\n")
|
|||
# ── Load dataset ──────────────────────────────────────────────────────────────
|
||||
if not LETTERS_JSONL.exists():
|
||||
sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
|
||||
"Run: conda run -n job-seeker python scripts/prepare_training_data.py")
|
||||
"Run: make prepare-training (or: python scripts/prepare_training_data.py)")
|
||||
|
||||
records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
|
||||
print(f"Loaded {len(records)} training examples.")
|
||||
|
|
@ -222,35 +227,102 @@ if not args.no_gguf and USE_UNSLOTH:
|
|||
else:
|
||||
gguf_path = None
|
||||
|
||||
# ── Print next steps ──────────────────────────────────────────────────────────
|
||||
print(f"\n{'='*60}")
|
||||
print(" DONE — next steps to load into Ollama:")
|
||||
print(f"{'='*60}")
|
||||
# ── Register with Ollama (auto) ────────────────────────────────────────────────
|
||||
|
||||
def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool:
|
||||
"""
|
||||
Copy GGUF into the shared Ollama models volume and register via the API.
|
||||
|
||||
Works in two modes:
|
||||
Containerised — OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
|
||||
translate the container path into Ollama's view of the file.
|
||||
Local — gguf_path is an absolute path Ollama can read directly.
|
||||
"""
|
||||
import shutil
|
||||
import requests
|
||||
|
||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
models_mount = os.environ.get("OLLAMA_MODELS_MOUNT", "")
|
||||
ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
|
||||
|
||||
# ── Place GGUF where Ollama can read it ───────────────────────────────────
|
||||
if models_mount and ollama_models_dir:
|
||||
# Containerised: write into the shared volume; Ollama reads from its own mount.
|
||||
dest_dir = Path(models_mount) / "custom"
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest = dest_dir / gguf_path.name
|
||||
if dest != gguf_path:
|
||||
print(f"Copying GGUF → shared volume: {dest}")
|
||||
shutil.copy2(gguf_path, dest)
|
||||
ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
|
||||
else:
|
||||
# Local: pass the absolute path directly.
|
||||
ollama_gguf = str(gguf_path.resolve())
|
||||
|
||||
modelfile_text = (
|
||||
f"FROM {ollama_gguf}\n"
|
||||
f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
|
||||
f"PARAMETER temperature 0.7\n"
|
||||
f"PARAMETER top_p 0.9\n"
|
||||
f"PARAMETER num_ctx 32768\n"
|
||||
)
|
||||
|
||||
# Write Modelfile to disk as a reference (useful for debugging)
|
||||
(OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
|
||||
|
||||
# ── Create via Ollama API ─────────────────────────────────────────────────
|
||||
print(f"\nRegistering '{model_name}' with Ollama at {ollama_url} …")
|
||||
try:
|
||||
r = requests.post(
|
||||
f"{ollama_url}/api/create",
|
||||
json={"name": model_name, "modelfile": modelfile_text},
|
||||
timeout=300,
|
||||
stream=True,
|
||||
)
|
||||
for line in r.iter_lines():
|
||||
if line:
|
||||
import json as _json
|
||||
try:
|
||||
msg = _json.loads(line).get("status", "")
|
||||
except Exception:
|
||||
msg = line.decode()
|
||||
if msg:
|
||||
print(f" {msg}")
|
||||
if r.status_code != 200:
|
||||
print(f" WARNING: Ollama returned HTTP {r.status_code}")
|
||||
return False
|
||||
except Exception as exc:
|
||||
print(f" Ollama registration failed: {exc}")
|
||||
print(f" Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
|
||||
return False
|
||||
|
||||
# ── Update config/llm.yaml ────────────────────────────────────────────────
|
||||
llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||
if llm_yaml.exists():
|
||||
try:
|
||||
import yaml as _yaml
|
||||
cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
|
||||
if "backends" in cfg and "ollama" in cfg["backends"]:
|
||||
cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
|
||||
llm_yaml.write_text(
|
||||
_yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
|
||||
)
|
||||
print(f" llm.yaml updated → ollama.model = {model_name}:latest")
|
||||
except Exception as exc:
|
||||
print(f" Could not update llm.yaml automatically: {exc}")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f" Model ready: {model_name}:latest")
|
||||
print(f" Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
|
||||
print(f"{'='*60}\n")
|
||||
return True
|
||||
|
||||
|
||||
if gguf_path and gguf_path.exists():
|
||||
modelfile = OUTPUT_DIR / "Modelfile"
|
||||
modelfile.write_text(f"""FROM {gguf_path}
|
||||
SYSTEM \"\"\"
|
||||
{SYSTEM_PROMPT}
|
||||
\"\"\"
|
||||
PARAMETER temperature 0.7
|
||||
PARAMETER top_p 0.9
|
||||
PARAMETER num_ctx 32768
|
||||
""")
|
||||
print(f"\n1. Modelfile written to: {modelfile}")
|
||||
print(f"\n2. Create the Ollama model:")
|
||||
print(f" ollama create {OLLAMA_NAME} -f {modelfile}")
|
||||
print(f"\n3. Test it:")
|
||||
print(f" ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
|
||||
print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
|
||||
print(f" then pick it in Settings → LLM Backends → Ollama → Model.")
|
||||
_auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT)
|
||||
else:
|
||||
print(f"\n Adapter only (no GGUF). To convert manually:")
|
||||
print(f" 1. Merge adapter:")
|
||||
print(f" conda run -n ogma python -c \"")
|
||||
print(f" from peft import AutoPeftModelForCausalLM")
|
||||
print(f" m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
|
||||
print(f" m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
|
||||
print(f" 2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
|
||||
print(f" 3. ollama create {OLLAMA_NAME} -f Modelfile")
|
||||
print()
|
||||
print(f"\n{'='*60}")
|
||||
print(" Adapter saved (no GGUF produced).")
|
||||
print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
|
||||
print(f" Adapter path: {adapter_path}")
|
||||
print(f"{'='*60}\n")
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ Usage:
|
|||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
|
@ -22,7 +23,10 @@ from scripts.user_profile import UserProfile
|
|||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
|
||||
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||
_docs_env = os.environ.get("DOCS_DIR", "")
|
||||
_docs = Path(_docs_env) if _docs_env else (
|
||||
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||
)
|
||||
LETTERS_DIR = _docs
|
||||
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
||||
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
||||
|
|
@ -77,6 +81,16 @@ def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]:
|
|||
if p not in seen:
|
||||
seen.add(p)
|
||||
all_paths.append(p)
|
||||
|
||||
# Also scan web-uploaded files (Settings → Fine-tune → Upload)
|
||||
uploads_dir = letters_dir / "training_data" / "uploads"
|
||||
if uploads_dir.exists():
|
||||
for glob in ("*.md", "*.txt"):
|
||||
for p in uploads_dir.glob(glob):
|
||||
if p not in seen:
|
||||
seen.add(p)
|
||||
all_paths.append(p)
|
||||
|
||||
for path in sorted(all_paths):
|
||||
text = path.read_text(encoding="utf-8", errors="ignore").strip()
|
||||
if not text or len(text) < 100:
|
||||
|
|
|
|||
|
|
@ -243,6 +243,17 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
|||
)
|
||||
return
|
||||
|
||||
elif task_type == "prepare_training":
|
||||
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
|
||||
records = build_records()
|
||||
write_jsonl(records, DEFAULT_OUTPUT)
|
||||
n = len(records)
|
||||
update_task_status(
|
||||
db_path, task_id, "completed",
|
||||
error=f"{n} training pair{'s' if n != 1 else ''} extracted",
|
||||
)
|
||||
return
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown task_type: {task_type!r}")
|
||||
|
||||
|
|
|
|||
137
setup.sh
137
setup.sh
|
|
@ -64,6 +64,35 @@ install_git() {
|
|||
success "git installed."
|
||||
}
|
||||
|
||||
# ── Podman detection ───────────────────────────────────────────────────────────
|
||||
# If Podman is already present, skip Docker entirely and ensure podman-compose is available.
|
||||
check_podman() {
|
||||
if ! cmd_exists podman; then return 1; fi
|
||||
success "Podman detected ($(podman --version)) — skipping Docker install."
|
||||
# Ensure a compose provider is available
|
||||
if podman compose version &>/dev/null 2>&1; then
|
||||
success "podman compose available."
|
||||
elif cmd_exists podman-compose; then
|
||||
success "podman-compose available."
|
||||
else
|
||||
info "Installing podman-compose…"
|
||||
case "$DISTRO_FAMILY" in
|
||||
debian) $SUDO apt-get install -y podman-compose 2>/dev/null \
|
||||
|| pip3 install --user podman-compose ;;
|
||||
fedora) $SUDO dnf install -y podman-compose 2>/dev/null \
|
||||
|| pip3 install --user podman-compose ;;
|
||||
arch) $SUDO pacman -Sy --noconfirm podman-compose 2>/dev/null \
|
||||
|| pip3 install --user podman-compose ;;
|
||||
macos) brew install podman-compose 2>/dev/null \
|
||||
|| pip3 install --user podman-compose ;;
|
||||
esac
|
||||
success "podman-compose installed."
|
||||
fi
|
||||
warn "GPU profiles (single-gpu, dual-gpu) require CDI setup:"
|
||||
warn " sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
|
||||
return 0
|
||||
}
|
||||
|
||||
# ── Docker ─────────────────────────────────────────────────────────────────────
|
||||
install_docker_linux_debian() {
|
||||
$SUDO apt-get update -q
|
||||
|
|
@ -139,6 +168,27 @@ check_compose() {
|
|||
fi
|
||||
}
|
||||
|
||||
# ── Docker daemon health check ──────────────────────────────────────────────────
|
||||
check_docker_running() {
|
||||
if docker info &>/dev/null 2>&1; then
|
||||
success "Docker daemon is running."
|
||||
return
|
||||
fi
|
||||
warn "Docker daemon is not responding."
|
||||
if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then
|
||||
info "Starting Docker service…"
|
||||
$SUDO systemctl start docker 2>/dev/null || true
|
||||
sleep 2
|
||||
if docker info &>/dev/null 2>&1; then
|
||||
success "Docker daemon started."
|
||||
else
|
||||
warn "Docker failed to start. Run: sudo systemctl start docker"
|
||||
fi
|
||||
elif [[ "$OS" == "Darwin" ]]; then
|
||||
warn "Docker Desktop is not running. Start it, wait for the whale icon, then run 'make start'."
|
||||
fi
|
||||
}
|
||||
|
||||
# ── NVIDIA Container Toolkit ───────────────────────────────────────────────────
|
||||
install_nvidia_toolkit() {
|
||||
[[ "$OS" != "Linux" ]] && return # macOS has no NVIDIA support
|
||||
|
|
@ -146,8 +196,8 @@ install_nvidia_toolkit() {
|
|||
info "No NVIDIA GPU detected — skipping Container Toolkit."
|
||||
return
|
||||
fi
|
||||
if docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu22.04 nvidia-smi &>/dev/null 2>&1; then
|
||||
success "NVIDIA Container Toolkit already working."
|
||||
if cmd_exists nvidia-ctk && nvidia-ctk runtime validate --runtime=docker &>/dev/null 2>&1; then
|
||||
success "NVIDIA Container Toolkit already configured."
|
||||
return
|
||||
fi
|
||||
info "NVIDIA GPU detected. Installing Container Toolkit…"
|
||||
|
|
@ -176,6 +226,8 @@ install_nvidia_toolkit() {
|
|||
}
|
||||
|
||||
# ── Environment setup ──────────────────────────────────────────────────────────
|
||||
# Note: Ollama runs as a Docker container — the compose.yml ollama service
|
||||
# handles model download automatically on first start (see docker/ollama/entrypoint.sh).
|
||||
setup_env() {
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
|
||||
|
|
@ -186,29 +238,88 @@ setup_env() {
|
|||
fi
|
||||
}
|
||||
|
||||
# ── Model weights storage ───────────────────────────────────────────────────────
|
||||
_update_env_key() {
|
||||
# Portable in-place key=value update for .env files (Linux + macOS).
|
||||
# Appends the key if not already present.
|
||||
local file="$1" key="$2" val="$3"
|
||||
awk -v k="$key" -v v="$val" '
|
||||
BEGIN { found=0 }
|
||||
$0 ~ ("^" k "=") { print k "=" v; found=1; next }
|
||||
{ print }
|
||||
END { if (!found) print k "=" v }
|
||||
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
|
||||
}
|
||||
|
||||
configure_model_paths() {
|
||||
local env_file
|
||||
env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
|
||||
|
||||
# Skip prompts when stdin is not a terminal (e.g. curl | bash)
|
||||
if [[ ! -t 0 ]]; then
|
||||
info "Non-interactive — using default model paths from .env"
|
||||
return
|
||||
fi
|
||||
|
||||
echo ""
|
||||
info "Model weights storage"
|
||||
echo -e " AI models can be 2–30+ GB each. If you have a separate data drive,"
|
||||
echo -e " point these at it now. Press Enter to keep the value shown in [brackets]."
|
||||
echo ""
|
||||
|
||||
local current input
|
||||
|
||||
current="$(grep -E '^OLLAMA_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
|
||||
[[ -z "$current" ]] && current="~/models/ollama"
|
||||
read -rp " Ollama models dir [${current}]: " input || input=""
|
||||
input="${input:-$current}"
|
||||
input="${input/#\~/$HOME}"
|
||||
mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
|
||||
_update_env_key "$env_file" "OLLAMA_MODELS_DIR" "$input"
|
||||
success "OLLAMA_MODELS_DIR=$input"
|
||||
|
||||
current="$(grep -E '^VLLM_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
|
||||
[[ -z "$current" ]] && current="~/models/vllm"
|
||||
read -rp " vLLM models dir [${current}]: " input || input=""
|
||||
input="${input:-$current}"
|
||||
input="${input/#\~/$HOME}"
|
||||
mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
|
||||
_update_env_key "$env_file" "VLLM_MODELS_DIR" "$input"
|
||||
success "VLLM_MODELS_DIR=$input"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||
main() {
|
||||
echo ""
|
||||
echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
|
||||
echo -e "${BLUE}║ Peregrine — Dependency Installer ║${NC}"
|
||||
echo -e "${BLUE}║ by Circuit Forge LLC ║${NC}"
|
||||
echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
|
||||
echo -e "${BLUE}╔══════════════════════════════════════════════════════╗${NC}"
|
||||
echo -e "${BLUE}║ Peregrine — Dependency Installer ║${NC}"
|
||||
echo -e "${BLUE}║ by Circuit Forge LLC ║${NC}"
|
||||
echo -e "${BLUE}║ \"Don't be evil, for real and forever.\" ║${NC}"
|
||||
echo -e "${BLUE}╚══════════════════════════════════════════════════════╝${NC}"
|
||||
echo ""
|
||||
|
||||
install_git
|
||||
install_docker
|
||||
check_compose
|
||||
install_nvidia_toolkit
|
||||
# Podman takes precedence if already installed; otherwise install Docker
|
||||
if ! check_podman; then
|
||||
install_docker
|
||||
check_docker_running
|
||||
check_compose
|
||||
install_nvidia_toolkit
|
||||
fi
|
||||
setup_env
|
||||
configure_model_paths
|
||||
|
||||
echo ""
|
||||
success "All dependencies installed."
|
||||
echo ""
|
||||
echo -e " ${GREEN}Next steps:${NC}"
|
||||
echo -e " 1. Edit ${YELLOW}.env${NC} to set your preferred ports and model paths"
|
||||
echo -e " 2. Start Peregrine:"
|
||||
echo -e " ${YELLOW}docker compose --profile remote up -d${NC}"
|
||||
echo -e " 3. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
|
||||
echo -e " 1. Start Peregrine:"
|
||||
echo -e " ${YELLOW}make start${NC} # remote/API-only (no local GPU)"
|
||||
echo -e " ${YELLOW}make start PROFILE=cpu${NC} # local Ollama inference (CPU)"
|
||||
echo -e " 2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
|
||||
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
|
||||
echo ""
|
||||
if groups "$USER" 2>/dev/null | grep -q docker; then
|
||||
true
|
||||
|
|
|
|||
Loading…
Reference in a new issue