Compare commits
No commits in common. "006738f7b3401576a42a945e251d1b26c5b4878b" and "a7d9bd075ac5578b37298626debc41552f4365e1" have entirely different histories.
006738f7b3
...
a7d9bd075a
20 changed files with 111 additions and 4127 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -22,8 +22,3 @@ config/user.yaml
|
||||||
config/.backup-*
|
config/.backup-*
|
||||||
config/integrations/*.yaml
|
config/integrations/*.yaml
|
||||||
!config/integrations/*.yaml.example
|
!config/integrations/*.yaml.example
|
||||||
|
|
||||||
# companyScraper runtime artifacts
|
|
||||||
scrapers/.cache/
|
|
||||||
scrapers/.debug/
|
|
||||||
scrapers/raw_scrapes/
|
|
||||||
|
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
||||||
# Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
|
|
||||||
# Large image (~12-15 GB after build). Built once, cached on rebuilds.
|
|
||||||
# GPU strongly recommended. CPU fallback works but training is very slow.
|
|
||||||
#
|
|
||||||
# Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
|
|
||||||
# If your GPU requires a different CUDA version, change the FROM line and
|
|
||||||
# reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
|
|
||||||
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Build tools needed by bitsandbytes CUDA kernels and unsloth
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
gcc g++ git libgomp1 \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install training stack.
|
|
||||||
# unsloth detects CUDA version automatically from the base image.
|
|
||||||
RUN pip install --no-cache-dir \
|
|
||||||
"unsloth @ git+https://github.com/unslothai/unsloth.git" \
|
|
||||||
"datasets>=2.18" "trl>=0.8" peft transformers \
|
|
||||||
"bitsandbytes>=0.43.0" accelerate sentencepiece \
|
|
||||||
requests pyyaml
|
|
||||||
|
|
||||||
COPY scripts/ /app/scripts/
|
|
||||||
COPY config/ /app/config/
|
|
||||||
|
|
||||||
ENV PYTHONUNBUFFERED=1
|
|
||||||
# Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
|
|
||||||
ENV CUDA_VISIBLE_DEVICES=0
|
|
||||||
|
|
||||||
# Runtime env vars injected by compose.yml:
|
|
||||||
# OLLAMA_URL — Ollama API base (default: http://ollama:11434)
|
|
||||||
# OLLAMA_MODELS_MOUNT — finetune container's mount path for ollama models volume
|
|
||||||
# OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
|
|
||||||
# DOCS_DIR — cover letters + training data root (default: /docs)
|
|
||||||
|
|
||||||
ENTRYPOINT ["python", "scripts/finetune_local.py"]
|
|
||||||
46
Makefile
46
Makefile
|
|
@ -1,66 +1,36 @@
|
||||||
# Makefile — Peregrine convenience targets
|
# Makefile — Peregrine convenience targets
|
||||||
# Usage: make <target>
|
# Usage: make <target>
|
||||||
|
|
||||||
.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help
|
.PHONY: setup preflight start stop restart logs test clean help
|
||||||
|
|
||||||
PROFILE ?= remote
|
PROFILE ?= remote
|
||||||
PYTHON ?= python3
|
PYTHON ?= python3
|
||||||
|
|
||||||
# Auto-detect container engine: prefer docker compose, fall back to podman
|
setup: ## Install dependencies (Docker, NVIDIA toolkit)
|
||||||
COMPOSE ?= $(shell \
|
|
||||||
command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
|
|
||||||
&& echo "docker compose" \
|
|
||||||
|| (command -v podman >/dev/null 2>&1 \
|
|
||||||
&& podman compose version >/dev/null 2>&1 \
|
|
||||||
&& echo "podman compose" \
|
|
||||||
|| echo "podman-compose"))
|
|
||||||
|
|
||||||
# GPU profiles require an overlay for NVIDIA device reservations.
|
|
||||||
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
|
|
||||||
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
|
||||||
COMPOSE_FILES := -f compose.yml
|
|
||||||
ifneq (,$(findstring podman,$(COMPOSE)))
|
|
||||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
|
||||||
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
|
||||||
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
|
|
||||||
@bash setup.sh
|
@bash setup.sh
|
||||||
|
|
||||||
preflight: ## Check ports + system resources; write .env
|
preflight: ## Check ports + system resources; write .env
|
||||||
@$(PYTHON) scripts/preflight.py
|
@$(PYTHON) scripts/preflight.py
|
||||||
|
|
||||||
start: preflight ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
|
start: preflight ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
|
||||||
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d
|
docker compose --profile $(PROFILE) up -d
|
||||||
|
|
||||||
stop: ## Stop all Peregrine services
|
stop: ## Stop all Peregrine services
|
||||||
$(COMPOSE) down
|
docker compose down
|
||||||
|
|
||||||
restart: preflight ## Preflight check then restart all services
|
restart: preflight ## Preflight check then restart all services
|
||||||
$(COMPOSE) down && $(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) up -d
|
docker compose down && docker compose --profile $(PROFILE) up -d
|
||||||
|
|
||||||
logs: ## Tail app logs
|
logs: ## Tail app logs
|
||||||
$(COMPOSE) logs -f app
|
docker compose logs -f app
|
||||||
|
|
||||||
test: ## Run the test suite
|
test: ## Run the test suite
|
||||||
@$(PYTHON) -m pytest tests/ -v
|
$(PYTHON) -m pytest tests/ -v
|
||||||
|
|
||||||
prepare-training: ## Scan docs_dir for cover letters and build training JSONL
|
|
||||||
$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
|
|
||||||
|
|
||||||
finetune: ## Fine-tune your personal cover letter model (run prepare-training first)
|
|
||||||
@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
|
|
||||||
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune
|
|
||||||
|
|
||||||
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
|
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
|
||||||
@echo "WARNING: This will delete all Peregrine containers and data."
|
@echo "WARNING: This will delete all Peregrine containers and data."
|
||||||
@read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ]
|
@read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ]
|
||||||
$(COMPOSE) down --rmi local --volumes
|
docker compose down --rmi local --volumes
|
||||||
|
|
||||||
help: ## Show this help
|
help: ## Show this help
|
||||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
|
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**
|
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**
|
||||||
|
|
||||||
> *"Don't be evil, for real and forever."*
|
|
||||||
|
|
||||||
Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
|
Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
|
||||||
Privacy-first, local-first. Your data never leaves your machine.
|
Privacy-first, local-first. Your data never leaves your machine.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -403,9 +403,9 @@ elif step == 5:
|
||||||
st.caption("Change only if services run on non-default ports or remote hosts.")
|
st.caption("Change only if services run on non-default ports or remote hosts.")
|
||||||
svc = dict(saved_yaml.get("services", {}))
|
svc = dict(saved_yaml.get("services", {}))
|
||||||
for svc_name, default_host, default_port in [
|
for svc_name, default_host, default_port in [
|
||||||
("ollama", "ollama", 11434), # Docker service name
|
("ollama", "localhost", 11434),
|
||||||
("vllm", "vllm", 8000), # Docker service name
|
("vllm", "localhost", 8000),
|
||||||
("searxng", "searxng", 8080), # Docker internal port (host-mapped: 8888)
|
("searxng", "localhost", 8888),
|
||||||
]:
|
]:
|
||||||
c1, c2 = st.columns([3, 1])
|
c1, c2 = st.columns([3, 1])
|
||||||
svc[f"{svc_name}_host"] = c1.text_input(
|
svc[f"{svc_name}_host"] = c1.text_input(
|
||||||
|
|
|
||||||
|
|
@ -1026,10 +1026,9 @@ with tab_finetune:
|
||||||
|
|
||||||
if ft_step == 1:
|
if ft_step == 1:
|
||||||
st.markdown("**Step 1: Upload Cover Letters**")
|
st.markdown("**Step 1: Upload Cover Letters**")
|
||||||
st.caption("Accepted formats: `.md` or `.txt`. Convert PDFs to text before uploading.")
|
|
||||||
uploaded = st.file_uploader(
|
uploaded = st.file_uploader(
|
||||||
"Upload cover letters (.md or .txt)",
|
"Upload cover letters (PDF, DOCX, or TXT)",
|
||||||
type=["md", "txt"],
|
type=["pdf", "docx", "txt"],
|
||||||
accept_multiple_files=True,
|
accept_multiple_files=True,
|
||||||
)
|
)
|
||||||
if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"):
|
if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"):
|
||||||
|
|
@ -1041,45 +1040,18 @@ with tab_finetune:
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
elif ft_step == 2:
|
elif ft_step == 2:
|
||||||
st.markdown("**Step 2: Extract Training Pairs**")
|
st.markdown("**Step 2: Preview Training Pairs**")
|
||||||
import json as _json
|
st.info("Run `python scripts/prepare_training_data.py` to extract pairs, then return here.")
|
||||||
import sqlite3 as _sqlite3
|
|
||||||
from scripts.db import DEFAULT_DB as _FT_DB
|
|
||||||
|
|
||||||
jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl"
|
jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl"
|
||||||
|
|
||||||
# Show task status
|
|
||||||
_ft_conn = _sqlite3.connect(_FT_DB)
|
|
||||||
_ft_conn.row_factory = _sqlite3.Row
|
|
||||||
_ft_task = _ft_conn.execute(
|
|
||||||
"SELECT * FROM background_tasks WHERE task_type='prepare_training' ORDER BY id DESC LIMIT 1"
|
|
||||||
).fetchone()
|
|
||||||
_ft_conn.close()
|
|
||||||
|
|
||||||
if _ft_task:
|
|
||||||
_ft_status = _ft_task["status"]
|
|
||||||
if _ft_status == "completed":
|
|
||||||
st.success(f"✅ {_ft_task['error'] or 'Extraction complete'}")
|
|
||||||
elif _ft_status in ("running", "queued"):
|
|
||||||
st.info(f"⏳ {_ft_status.capitalize()}… refresh to check progress.")
|
|
||||||
elif _ft_status == "failed":
|
|
||||||
st.error(f"Extraction failed: {_ft_task['error']}")
|
|
||||||
|
|
||||||
if st.button("⚙️ Extract Training Pairs", type="primary", key="ft_extract2"):
|
|
||||||
from scripts.task_runner import submit_task as _ft_submit
|
|
||||||
_ft_submit(_FT_DB, "prepare_training", 0)
|
|
||||||
st.info("Extracting in the background — refresh in a moment.")
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
if jsonl_path.exists():
|
if jsonl_path.exists():
|
||||||
|
import json as _json
|
||||||
pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()]
|
pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()]
|
||||||
st.caption(f"{len(pairs)} training pairs ready.")
|
st.caption(f"{len(pairs)} training pairs extracted.")
|
||||||
for i, p in enumerate(pairs[:3]):
|
for i, p in enumerate(pairs[:3]):
|
||||||
with st.expander(f"Pair {i+1}"):
|
with st.expander(f"Pair {i+1}"):
|
||||||
st.text(p.get("output", p.get("input", ""))[:300])
|
st.text(p.get("input", "")[:300])
|
||||||
else:
|
else:
|
||||||
st.caption("No training pairs yet — click Extract above.")
|
st.warning("No training pairs found. Run `prepare_training_data.py` first.")
|
||||||
|
|
||||||
col_back, col_next = st.columns([1, 4])
|
col_back, col_next = st.columns([1, 4])
|
||||||
if col_back.button("← Back", key="ft_back2"):
|
if col_back.button("← Back", key="ft_back2"):
|
||||||
st.session_state.ft_step = 1
|
st.session_state.ft_step = 1
|
||||||
|
|
@ -1089,45 +1061,13 @@ with tab_finetune:
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
elif ft_step == 3:
|
elif ft_step == 3:
|
||||||
st.markdown("**Step 3: Fine-Tune**")
|
st.markdown("**Step 3: Train**")
|
||||||
|
st.slider("Epochs", 3, 20, 10, key="ft_epochs")
|
||||||
_ft_profile_name = ((_profile.name.split() or ["cover"])[0].lower()
|
if st.button("🚀 Start Fine-Tune", type="primary", key="ft_start"):
|
||||||
if _profile else "cover")
|
st.info("Fine-tune queued as a background task. Check back in 30–60 minutes.")
|
||||||
_ft_model_name = f"{_ft_profile_name}-cover-writer"
|
if st.button("← Back", key="ft_back3"):
|
||||||
|
|
||||||
st.info(
|
|
||||||
"Run the command below from your terminal. Training takes 30–90 min on GPU "
|
|
||||||
"and registers the model automatically when complete."
|
|
||||||
)
|
|
||||||
st.code("make finetune PROFILE=single-gpu", language="bash")
|
|
||||||
st.caption(
|
|
||||||
f"Your model will appear as **{_ft_model_name}:latest** in Ollama. "
|
|
||||||
"Cover letter generation will use it automatically."
|
|
||||||
)
|
|
||||||
|
|
||||||
st.markdown("**Model status:**")
|
|
||||||
try:
|
|
||||||
import os as _os
|
|
||||||
import requests as _ft_req
|
|
||||||
_ollama_url = _os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
|
||||||
_tags = _ft_req.get(f"{_ollama_url}/api/tags", timeout=3)
|
|
||||||
if _tags.status_code == 200:
|
|
||||||
_model_names = [m["name"] for m in _tags.json().get("models", [])]
|
|
||||||
if any(_ft_model_name in m for m in _model_names):
|
|
||||||
st.success(f"✅ `{_ft_model_name}:latest` is ready in Ollama!")
|
|
||||||
else:
|
|
||||||
st.warning(f"⏳ `{_ft_model_name}:latest` not registered yet.")
|
|
||||||
else:
|
|
||||||
st.caption("Ollama returned an unexpected response.")
|
|
||||||
except Exception:
|
|
||||||
st.caption("Could not reach Ollama — ensure services are running with `make start`.")
|
|
||||||
|
|
||||||
col_back, col_refresh = st.columns([1, 3])
|
|
||||||
if col_back.button("← Back", key="ft_back3"):
|
|
||||||
st.session_state.ft_step = 2
|
st.session_state.ft_step = 2
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if col_refresh.button("🔄 Check model status", key="ft_refresh3"):
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
# ── Developer tab ─────────────────────────────────────────────────────────────
|
# ── Developer tab ─────────────────────────────────────────────────────────────
|
||||||
if _show_dev_tab:
|
if _show_dev_tab:
|
||||||
|
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
# compose.gpu.yml — Docker NVIDIA GPU overlay
|
|
||||||
#
|
|
||||||
# Adds NVIDIA GPU reservations to Peregrine services.
|
|
||||||
# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
|
|
||||||
# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
|
|
||||||
#
|
|
||||||
# Prerequisites:
|
|
||||||
# sudo nvidia-ctk runtime configure --runtime=docker
|
|
||||||
# sudo systemctl restart docker
|
|
||||||
#
|
|
||||||
services:
|
|
||||||
ollama:
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
device_ids: ["0"]
|
|
||||||
capabilities: [gpu]
|
|
||||||
|
|
||||||
vision:
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
device_ids: ["0"]
|
|
||||||
capabilities: [gpu]
|
|
||||||
|
|
||||||
vllm:
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
device_ids: ["1"]
|
|
||||||
capabilities: [gpu]
|
|
||||||
|
|
||||||
finetune:
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
device_ids: ["0"]
|
|
||||||
capabilities: [gpu]
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
# compose.podman-gpu.yml — Podman GPU override
|
|
||||||
#
|
|
||||||
# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
|
|
||||||
# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
|
|
||||||
# when podman/podman-compose is detected, or manually:
|
|
||||||
# podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
|
|
||||||
#
|
|
||||||
# Prerequisites:
|
|
||||||
# sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
|
||||||
# (requires nvidia-container-toolkit >= 1.14)
|
|
||||||
#
|
|
||||||
services:
|
|
||||||
ollama:
|
|
||||||
devices:
|
|
||||||
- nvidia.com/gpu=0
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices: []
|
|
||||||
|
|
||||||
vision:
|
|
||||||
devices:
|
|
||||||
- nvidia.com/gpu=0
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices: []
|
|
||||||
|
|
||||||
vllm:
|
|
||||||
devices:
|
|
||||||
- nvidia.com/gpu=1
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices: []
|
|
||||||
|
|
||||||
finetune:
|
|
||||||
devices:
|
|
||||||
- nvidia.com/gpu=0
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices: []
|
|
||||||
46
compose.yml
46
compose.yml
|
|
@ -12,7 +12,6 @@ services:
|
||||||
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
||||||
environment:
|
environment:
|
||||||
- STAGING_DB=/app/data/staging.db
|
- STAGING_DB=/app/data/staging.db
|
||||||
- DOCS_DIR=/docs
|
|
||||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||||
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
|
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
|
||||||
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
||||||
|
|
@ -48,6 +47,18 @@ services:
|
||||||
profiles: [cpu, single-gpu, dual-gpu]
|
profiles: [cpu, single-gpu, dual-gpu]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
ollama-gpu:
|
||||||
|
extends:
|
||||||
|
service: ollama
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ["0"]
|
||||||
|
capabilities: [gpu]
|
||||||
|
profiles: [single-gpu, dual-gpu]
|
||||||
|
|
||||||
vision:
|
vision:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
|
|
@ -57,6 +68,13 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
|
- VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
|
||||||
- VISION_REVISION=${VISION_REVISION:-2025-01-09}
|
- VISION_REVISION=${VISION_REVISION:-2025-01-09}
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ["0"]
|
||||||
|
capabilities: [gpu]
|
||||||
profiles: [single-gpu, dual-gpu]
|
profiles: [single-gpu, dual-gpu]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
|
@ -74,24 +92,12 @@ services:
|
||||||
--enforce-eager
|
--enforce-eager
|
||||||
--max-num-seqs 8
|
--max-num-seqs 8
|
||||||
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
|
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ["1"]
|
||||||
|
capabilities: [gpu]
|
||||||
profiles: [dual-gpu]
|
profiles: [dual-gpu]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
finetune:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile.finetune
|
|
||||||
volumes:
|
|
||||||
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
|
||||||
- ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
|
|
||||||
- ./config:/app/config
|
|
||||||
environment:
|
|
||||||
- DOCS_DIR=/docs
|
|
||||||
- OLLAMA_URL=http://ollama:11434
|
|
||||||
- OLLAMA_MODELS_MOUNT=/ollama-models
|
|
||||||
- OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
|
|
||||||
depends_on:
|
|
||||||
ollama:
|
|
||||||
condition: service_started
|
|
||||||
profiles: [cpu, single-gpu, dual-gpu]
|
|
||||||
restart: "no"
|
|
||||||
|
|
|
||||||
|
|
@ -21,26 +21,26 @@ backends:
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
ollama:
|
ollama:
|
||||||
api_key: ollama
|
api_key: ollama
|
||||||
base_url: http://ollama:11434/v1
|
base_url: http://localhost:11434/v1
|
||||||
enabled: true
|
enabled: true
|
||||||
model: llama3.2:3b
|
model: meghan-cover-writer:latest
|
||||||
supports_images: false
|
supports_images: false
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
ollama_research:
|
ollama_research:
|
||||||
api_key: ollama
|
api_key: ollama
|
||||||
base_url: http://ollama:11434/v1
|
base_url: http://localhost:11434/v1
|
||||||
enabled: true
|
enabled: true
|
||||||
model: llama3.2:3b
|
model: llama3.1:8b
|
||||||
supports_images: false
|
supports_images: false
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
vision_service:
|
vision_service:
|
||||||
base_url: http://vision:8002
|
base_url: http://localhost:8002
|
||||||
enabled: true
|
enabled: true
|
||||||
supports_images: true
|
supports_images: true
|
||||||
type: vision_service
|
type: vision_service
|
||||||
vllm:
|
vllm:
|
||||||
api_key: ''
|
api_key: ''
|
||||||
base_url: http://vllm:8000/v1
|
base_url: http://localhost:8000/v1
|
||||||
enabled: true
|
enabled: true
|
||||||
model: __auto__
|
model: __auto__
|
||||||
supports_images: false
|
supports_images: false
|
||||||
|
|
|
||||||
|
|
@ -21,21 +21,21 @@ backends:
|
||||||
supports_images: false
|
supports_images: false
|
||||||
ollama:
|
ollama:
|
||||||
api_key: ollama
|
api_key: ollama
|
||||||
base_url: http://ollama:11434/v1 # Docker service name; use localhost:11434 outside Docker
|
base_url: http://localhost:11434/v1
|
||||||
enabled: true
|
enabled: true
|
||||||
model: llama3.2:3b
|
model: meghan-cover-writer:latest
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
supports_images: false
|
supports_images: false
|
||||||
ollama_research:
|
ollama_research:
|
||||||
api_key: ollama
|
api_key: ollama
|
||||||
base_url: http://ollama:11434/v1 # Docker service name; use localhost:11434 outside Docker
|
base_url: http://localhost:11434/v1
|
||||||
enabled: true
|
enabled: true
|
||||||
model: llama3.2:3b
|
model: llama3.1:8b
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
supports_images: false
|
supports_images: false
|
||||||
vllm:
|
vllm:
|
||||||
api_key: ''
|
api_key: ''
|
||||||
base_url: http://vllm:8000/v1 # Docker service name; use localhost:8000 outside Docker
|
base_url: http://localhost:8000/v1
|
||||||
enabled: true
|
enabled: true
|
||||||
model: __auto__
|
model: __auto__
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
|
|
|
||||||
|
|
@ -44,15 +44,15 @@ inference_profile: "remote" # remote | cpu | single-gpu | dual-gpu
|
||||||
|
|
||||||
services:
|
services:
|
||||||
streamlit_port: 8501
|
streamlit_port: 8501
|
||||||
ollama_host: ollama # Docker service name; use "localhost" if running outside Docker
|
ollama_host: localhost
|
||||||
ollama_port: 11434
|
ollama_port: 11434
|
||||||
ollama_ssl: false
|
ollama_ssl: false
|
||||||
ollama_ssl_verify: true
|
ollama_ssl_verify: true
|
||||||
vllm_host: vllm # Docker service name; use "localhost" if running outside Docker
|
vllm_host: localhost
|
||||||
vllm_port: 8000
|
vllm_port: 8000
|
||||||
vllm_ssl: false
|
vllm_ssl: false
|
||||||
vllm_ssl_verify: true
|
vllm_ssl_verify: true
|
||||||
searxng_host: searxng # Docker service name; use "localhost" if running outside Docker
|
searxng_host: localhost
|
||||||
searxng_port: 8080 # internal Docker port; use 8888 for host-mapped access
|
searxng_port: 8888
|
||||||
searxng_ssl: false
|
searxng_ssl: false
|
||||||
searxng_ssl_verify: true
|
searxng_ssl_verify: true
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ Unscheduled ideas and deferred features. Roughly grouped by area.
|
||||||
|
|
||||||
## Container Runtime
|
## Container Runtime
|
||||||
|
|
||||||
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
|
- **Podman support** — Update `Makefile` to auto-detect `docker compose` vs `podman-compose` (e.g. `COMPOSE ?= $(shell command -v docker 2>/dev/null && echo "docker compose" || echo "podman-compose")`). Note in README that rootless Podman requires CDI GPU device spec (`nvidia.com/gpu=all`) instead of `runtime: nvidia` in `compose.yml`.
|
||||||
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
|
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
||||||
|
|
@ -1,367 +0,0 @@
|
||||||
# CircuitForge License Server — Design Document
|
|
||||||
|
|
||||||
**Date:** 2026-02-25
|
|
||||||
**Status:** Approved — ready for implementation
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Goal
|
|
||||||
|
|
||||||
Build a self-hosted licensing server for Circuit Forge LLC products. v1 serves Peregrine; schema is multi-product from day one. Enforces free / paid / premium / ultra tier gates with offline-capable JWT validation, 30-day refresh cycle, 7-day grace period, seat tracking, usage telemetry, and a content violation flagging foundation.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────┐
|
|
||||||
│ circuitforge-license (Heimdall:8600) │
|
|
||||||
│ FastAPI + SQLite + RS256 JWT │
|
|
||||||
│ │
|
|
||||||
│ Public API (/v1/…): │
|
|
||||||
│ POST /v1/activate → issue JWT │
|
|
||||||
│ POST /v1/refresh → renew JWT │
|
|
||||||
│ POST /v1/deactivate → free a seat │
|
|
||||||
│ POST /v1/usage → record usage event │
|
|
||||||
│ POST /v1/flag → report violation │
|
|
||||||
│ │
|
|
||||||
│ Admin API (/admin/…, bearer token): │
|
|
||||||
│ POST/GET /admin/keys → CRUD keys │
|
|
||||||
│ DELETE /admin/keys/{id} → revoke │
|
|
||||||
│ GET /admin/activations → audit │
|
|
||||||
│ GET /admin/usage → telemetry │
|
|
||||||
│ GET/PATCH /admin/flags → flag review │
|
|
||||||
└─────────────────────────────────────────────────┘
|
|
||||||
↑ HTTPS via Caddy (license.circuitforge.com)
|
|
||||||
|
|
||||||
┌─────────────────────────────────────────────────┐
|
|
||||||
│ Peregrine (user's machine) │
|
|
||||||
│ scripts/license.py │
|
|
||||||
│ │
|
|
||||||
│ activate(key) → POST /v1/activate │
|
|
||||||
│ writes config/license.json │
|
|
||||||
│ verify_local() → validates JWT offline │
|
|
||||||
│ using embedded public key │
|
|
||||||
│ refresh_if_needed() → called on app startup │
|
|
||||||
│ effective_tier() → tier string for can_use() │
|
|
||||||
│ report_usage(…) → fire-and-forget telemetry │
|
|
||||||
│ report_flag(…) → fire-and-forget violation │
|
|
||||||
└─────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
**Key properties:**
|
|
||||||
- Peregrine verifies tier **offline** on every check — RS256 public key embedded at build time
|
|
||||||
- Network required only at activation and 30-day refresh
|
|
||||||
- Revoked keys stop working at next refresh cycle (≤30 day lag — acceptable for v1)
|
|
||||||
- `config/license.json` gitignored; missing = free tier
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Crypto: RS256 (asymmetric JWT)
|
|
||||||
|
|
||||||
- **Private key** — lives only on the license server (`keys/private.pem`, gitignored)
|
|
||||||
- **Public key** — committed to both the license server repo and Peregrine (`scripts/license_public_key.pem`)
|
|
||||||
- Peregrine can verify JWT authenticity without ever knowing the private key
|
|
||||||
- A stolen JWT cannot be forged without the private key
|
|
||||||
- Revocation: server refuses refresh; old JWT valid until expiry then grace period expires
|
|
||||||
|
|
||||||
**Key generation (one-time, on Heimdall):**
|
|
||||||
```bash
|
|
||||||
openssl genrsa -out keys/private.pem 2048
|
|
||||||
openssl rsa -in keys/private.pem -pubout -out keys/public.pem
|
|
||||||
# copy keys/public.pem → peregrine/scripts/license_public_key.pem
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Database Schema
|
|
||||||
|
|
||||||
```sql
|
|
||||||
CREATE TABLE license_keys (
|
|
||||||
id TEXT PRIMARY KEY, -- UUID
|
|
||||||
key_display TEXT UNIQUE NOT NULL, -- CFG-PRNG-XXXX-XXXX-XXXX
|
|
||||||
product TEXT NOT NULL, -- peregrine | falcon | osprey | …
|
|
||||||
tier TEXT NOT NULL, -- paid | premium | ultra
|
|
||||||
seats INTEGER DEFAULT 1,
|
|
||||||
valid_until TEXT, -- ISO date or NULL (perpetual)
|
|
||||||
revoked INTEGER DEFAULT 0,
|
|
||||||
customer_email TEXT, -- proper field, not buried in notes
|
|
||||||
source TEXT DEFAULT 'manual', -- manual | beta | promo | stripe
|
|
||||||
trial INTEGER DEFAULT 0, -- 1 = time-limited trial key
|
|
||||||
notes TEXT,
|
|
||||||
created_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE activations (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
key_id TEXT NOT NULL REFERENCES license_keys(id),
|
|
||||||
machine_id TEXT NOT NULL, -- sha256(hostname + MAC)
|
|
||||||
app_version TEXT, -- Peregrine version at last refresh
|
|
||||||
platform TEXT, -- linux | macos | windows | docker
|
|
||||||
activated_at TEXT NOT NULL,
|
|
||||||
last_refresh TEXT NOT NULL,
|
|
||||||
deactivated_at TEXT -- NULL = still active
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE usage_events (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
key_id TEXT NOT NULL REFERENCES license_keys(id),
|
|
||||||
machine_id TEXT NOT NULL,
|
|
||||||
product TEXT NOT NULL,
|
|
||||||
event_type TEXT NOT NULL, -- cover_letter_generated |
|
|
||||||
-- company_research | email_sync |
|
|
||||||
-- interview_prep | survey | etc.
|
|
||||||
metadata TEXT, -- JSON blob for context
|
|
||||||
created_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE flags (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
key_id TEXT NOT NULL REFERENCES license_keys(id),
|
|
||||||
machine_id TEXT,
|
|
||||||
product TEXT NOT NULL,
|
|
||||||
flag_type TEXT NOT NULL, -- content_violation | tos_violation |
|
|
||||||
-- abuse | manual
|
|
||||||
details TEXT, -- JSON: prompt snippet, output excerpt
|
|
||||||
status TEXT DEFAULT 'open', -- open | reviewed | dismissed | actioned
|
|
||||||
created_at TEXT NOT NULL,
|
|
||||||
reviewed_at TEXT,
|
|
||||||
action_taken TEXT -- none | warned | revoked
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE audit_log (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
entity_type TEXT NOT NULL, -- key | activation | flag
|
|
||||||
entity_id TEXT NOT NULL,
|
|
||||||
action TEXT NOT NULL, -- created | revoked | activated |
|
|
||||||
-- deactivated | flag_actioned
|
|
||||||
actor TEXT, -- admin identifier (future multi-admin)
|
|
||||||
details TEXT, -- JSON
|
|
||||||
created_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
**Flags scope (v1):** Schema and `POST /v1/flag` endpoint capture data. No admin enforcement UI in v1 — query DB directly. Build review UI in v2 when there's data to act on.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## JWT Payload
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"sub": "CFG-PRNG-A1B2-C3D4-E5F6",
|
|
||||||
"product": "peregrine",
|
|
||||||
"tier": "paid",
|
|
||||||
"seats": 2,
|
|
||||||
"machine": "a3f9c2…",
|
|
||||||
"notice": "Version 1.1 available — see circuitforge.com/update",
|
|
||||||
"iat": 1740000000,
|
|
||||||
"exp": 1742592000
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
`notice` is optional — set via a server config value; included in refresh responses so Peregrine can surface it as a banner. No DB table needed.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Key Format
|
|
||||||
|
|
||||||
`CFG-PRNG-A1B2-C3D4-E5F6`
|
|
||||||
|
|
||||||
- `CFG` — Circuit Forge
|
|
||||||
- `PRNG` / `FLCN` / `OSPY` / … — 4-char product code
|
|
||||||
- Three random 4-char alphanumeric segments
|
|
||||||
- Human-readable, easy to copy/paste into a support email
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Endpoint Reference
|
|
||||||
|
|
||||||
| Method | Path | Auth | Purpose |
|
|
||||||
|--------|------|------|---------|
|
|
||||||
| POST | `/v1/activate` | none | Issue JWT for key + machine |
|
|
||||||
| POST | `/v1/refresh` | JWT bearer | Renew JWT before expiry |
|
|
||||||
| POST | `/v1/deactivate` | JWT bearer | Free a seat |
|
|
||||||
| POST | `/v1/usage` | JWT bearer | Record usage event (fire-and-forget) |
|
|
||||||
| POST | `/v1/flag` | JWT bearer | Report content/ToS violation |
|
|
||||||
| POST | `/admin/keys` | admin token | Create a new key |
|
|
||||||
| GET | `/admin/keys` | admin token | List all keys + activation counts |
|
|
||||||
| DELETE | `/admin/keys/{id}` | admin token | Revoke a key |
|
|
||||||
| GET | `/admin/activations` | admin token | Full activation audit |
|
|
||||||
| GET | `/admin/usage` | admin token | Usage breakdown per key/product/event |
|
|
||||||
| GET | `/admin/flags` | admin token | List flags (open by default) |
|
|
||||||
| PATCH | `/admin/flags/{id}` | admin token | Update flag status + action |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Peregrine Client (`scripts/license.py`)
|
|
||||||
|
|
||||||
**Public API:**
|
|
||||||
```python
|
|
||||||
def activate(key: str) -> dict # POST /v1/activate, writes license.json
|
|
||||||
def verify_local() -> dict | None # validates JWT offline; None = free tier
|
|
||||||
def refresh_if_needed() -> None # silent; called on app startup
|
|
||||||
def effective_tier() -> str # "free"|"paid"|"premium"|"ultra"
|
|
||||||
def report_usage(event_type: str, # fire-and-forget; failures silently dropped
|
|
||||||
metadata: dict = {}) -> None
|
|
||||||
def report_flag(flag_type: str, # fire-and-forget
|
|
||||||
details: dict) -> None
|
|
||||||
```
|
|
||||||
|
|
||||||
**`effective_tier()` decision tree:**
|
|
||||||
```
|
|
||||||
license.json missing or unreadable → "free"
|
|
||||||
JWT signature invalid → "free"
|
|
||||||
JWT product != "peregrine" → "free"
|
|
||||||
JWT not expired → tier from payload
|
|
||||||
JWT expired, within grace period → tier from payload + show banner
|
|
||||||
JWT expired, grace period expired → "free" + show banner
|
|
||||||
```
|
|
||||||
|
|
||||||
**`config/license.json` (gitignored):**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"jwt": "eyJ…",
|
|
||||||
"key_display": "CFG-PRNG-A1B2-C3D4-E5F6",
|
|
||||||
"tier": "paid",
|
|
||||||
"valid_until": "2026-03-27",
|
|
||||||
"machine_id": "a3f9c2…",
|
|
||||||
"last_refresh": "2026-02-25T12:00:00Z",
|
|
||||||
"grace_until": null
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Integration point in `tiers.py`:**
|
|
||||||
```python
|
|
||||||
def effective_tier(profile) -> str:
|
|
||||||
from scripts.license import effective_tier as _license_tier
|
|
||||||
if profile.dev_tier_override: # dev override still works in dev mode
|
|
||||||
return profile.dev_tier_override
|
|
||||||
return _license_tier()
|
|
||||||
```
|
|
||||||
|
|
||||||
**Settings License tab** (new tab in `app/pages/2_Settings.py`):
|
|
||||||
- Text input: enter license key → calls `activate()` → shows result
|
|
||||||
- If active: tier badge, key display string, expiry date, seat count
|
|
||||||
- Grace period: amber banner with days remaining
|
|
||||||
- "Deactivate this machine" button → `/v1/deactivate`, deletes `license.json`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Deployment
|
|
||||||
|
|
||||||
**Repo:** `git.opensourcesolarpunk.com/pyr0ball/circuitforge-license` (private)
|
|
||||||
|
|
||||||
**Repo layout:**
|
|
||||||
```
|
|
||||||
circuitforge-license/
|
|
||||||
├── app/
|
|
||||||
│ ├── main.py # FastAPI app
|
|
||||||
│ ├── db.py # SQLite helpers, schema init
|
|
||||||
│ ├── models.py # Pydantic models
|
|
||||||
│ ├── crypto.py # RSA sign/verify helpers
|
|
||||||
│ └── routes/
|
|
||||||
│ ├── public.py # /v1/* endpoints
|
|
||||||
│ └── admin.py # /admin/* endpoints
|
|
||||||
├── data/ # SQLite DB (named volume)
|
|
||||||
├── keys/
|
|
||||||
│ ├── private.pem # gitignored
|
|
||||||
│ └── public.pem # committed
|
|
||||||
├── scripts/
|
|
||||||
│ └── issue-key.sh # curl wrapper for key issuance
|
|
||||||
├── tests/
|
|
||||||
├── Dockerfile
|
|
||||||
├── docker-compose.yml
|
|
||||||
├── .env.example
|
|
||||||
└── requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
**`docker-compose.yml` (on Heimdall):**
|
|
||||||
```yaml
|
|
||||||
services:
|
|
||||||
license:
|
|
||||||
build: .
|
|
||||||
restart: unless-stopped
|
|
||||||
ports:
|
|
||||||
- "127.0.0.1:8600:8600"
|
|
||||||
volumes:
|
|
||||||
- license_data:/app/data
|
|
||||||
- ./keys:/app/keys:ro
|
|
||||||
env_file: .env
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
license_data:
|
|
||||||
```
|
|
||||||
|
|
||||||
**`.env` (gitignored):**
|
|
||||||
```
|
|
||||||
ADMIN_TOKEN=<long random string>
|
|
||||||
JWT_PRIVATE_KEY_PATH=/app/keys/private.pem
|
|
||||||
JWT_PUBLIC_KEY_PATH=/app/keys/public.pem
|
|
||||||
JWT_EXPIRY_DAYS=30
|
|
||||||
GRACE_PERIOD_DAYS=7
|
|
||||||
```
|
|
||||||
|
|
||||||
**Caddy block (add to Heimdall Caddyfile):**
|
|
||||||
```caddy
|
|
||||||
license.circuitforge.com {
|
|
||||||
reverse_proxy localhost:8600
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Admin Workflow (v1)
|
|
||||||
|
|
||||||
All operations via `curl` or `scripts/issue-key.sh`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Issue a key
|
|
||||||
./scripts/issue-key.sh --product peregrine --tier paid --seats 2 \
|
|
||||||
--email user@example.com --notes "Beta — manual payment 2026-02-25"
|
|
||||||
# → CFG-PRNG-A1B2-C3D4-E5F6 (email to customer)
|
|
||||||
|
|
||||||
# List all keys
|
|
||||||
curl https://license.circuitforge.com/admin/keys \
|
|
||||||
-H "Authorization: Bearer $ADMIN_TOKEN"
|
|
||||||
|
|
||||||
# Revoke a key
|
|
||||||
curl -X DELETE https://license.circuitforge.com/admin/keys/{id} \
|
|
||||||
-H "Authorization: Bearer $ADMIN_TOKEN"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Testing Strategy
|
|
||||||
|
|
||||||
**License server:**
|
|
||||||
- pytest with in-memory SQLite and generated test keypair
|
|
||||||
- All endpoints tested: activate, refresh, deactivate, usage, flag, admin CRUD
|
|
||||||
- Seat limit enforcement, expiry, revocation all unit tested
|
|
||||||
|
|
||||||
**Peregrine client:**
|
|
||||||
- `verify_local()` tested with pre-signed test JWT using test keypair
|
|
||||||
- `activate()` / `refresh()` tested with `httpx` mocks
|
|
||||||
- `effective_tier()` tested across all states: valid, expired, grace, revoked, missing
|
|
||||||
|
|
||||||
**Integration smoke test:**
|
|
||||||
```bash
|
|
||||||
docker compose up -d
|
|
||||||
# create test key via admin API
|
|
||||||
# call /v1/activate with test key
|
|
||||||
# verify JWT signature with public key
|
|
||||||
# verify /v1/refresh extends expiry
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Decisions Log
|
|
||||||
|
|
||||||
| Decision | Rationale |
|
|
||||||
|----------|-----------|
|
|
||||||
| RS256 over HS256 | Public key embeddable in client; private key never leaves server |
|
|
||||||
| SQLite over Postgres | Matches Peregrine's SQLite-first philosophy; trivially backupable |
|
|
||||||
| 30-day JWT lifetime | Standard SaaS pattern; invisible to users in normal operation |
|
|
||||||
| 7-day grace period | Covers travel, network outages, server maintenance |
|
|
||||||
| Flags v1: capture only | No volume to justify review UI yet; add in v2 |
|
|
||||||
| No payment integration | Manual issuance until customer volume justifies automation |
|
|
||||||
| Multi-product schema | Adding a column now vs migrating a live DB later |
|
|
||||||
| Separate repo | License server is infrastructure, not part of Peregrine's BSL scope |
|
|
||||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -32,12 +32,7 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
# ── Config ────────────────────────────────────────────────────────────────────
|
# ── Config ────────────────────────────────────────────────────────────────────
|
||||||
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
||||||
|
|
||||||
# DOCS_DIR env var overrides user_profile when running inside Docker
|
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
_docs_env = os.environ.get("DOCS_DIR", "")
|
|
||||||
_docs = Path(_docs_env) if _docs_env else (
|
|
||||||
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
|
||||||
)
|
|
||||||
|
|
||||||
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
||||||
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
||||||
GGUF_DIR = _docs / "training_data" / "gguf"
|
GGUF_DIR = _docs / "training_data" / "gguf"
|
||||||
|
|
@ -71,7 +66,7 @@ print(f"{'='*60}\n")
|
||||||
# ── Load dataset ──────────────────────────────────────────────────────────────
|
# ── Load dataset ──────────────────────────────────────────────────────────────
|
||||||
if not LETTERS_JSONL.exists():
|
if not LETTERS_JSONL.exists():
|
||||||
sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
|
sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
|
||||||
"Run: make prepare-training (or: python scripts/prepare_training_data.py)")
|
"Run: conda run -n job-seeker python scripts/prepare_training_data.py")
|
||||||
|
|
||||||
records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
|
records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
|
||||||
print(f"Loaded {len(records)} training examples.")
|
print(f"Loaded {len(records)} training examples.")
|
||||||
|
|
@ -227,102 +222,35 @@ if not args.no_gguf and USE_UNSLOTH:
|
||||||
else:
|
else:
|
||||||
gguf_path = None
|
gguf_path = None
|
||||||
|
|
||||||
# ── Register with Ollama (auto) ────────────────────────────────────────────────
|
# ── Print next steps ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool:
|
|
||||||
"""
|
|
||||||
Copy GGUF into the shared Ollama models volume and register via the API.
|
|
||||||
|
|
||||||
Works in two modes:
|
|
||||||
Containerised — OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
|
|
||||||
translate the container path into Ollama's view of the file.
|
|
||||||
Local — gguf_path is an absolute path Ollama can read directly.
|
|
||||||
"""
|
|
||||||
import shutil
|
|
||||||
import requests
|
|
||||||
|
|
||||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
|
||||||
models_mount = os.environ.get("OLLAMA_MODELS_MOUNT", "")
|
|
||||||
ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
|
|
||||||
|
|
||||||
# ── Place GGUF where Ollama can read it ───────────────────────────────────
|
|
||||||
if models_mount and ollama_models_dir:
|
|
||||||
# Containerised: write into the shared volume; Ollama reads from its own mount.
|
|
||||||
dest_dir = Path(models_mount) / "custom"
|
|
||||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
dest = dest_dir / gguf_path.name
|
|
||||||
if dest != gguf_path:
|
|
||||||
print(f"Copying GGUF → shared volume: {dest}")
|
|
||||||
shutil.copy2(gguf_path, dest)
|
|
||||||
ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
|
|
||||||
else:
|
|
||||||
# Local: pass the absolute path directly.
|
|
||||||
ollama_gguf = str(gguf_path.resolve())
|
|
||||||
|
|
||||||
modelfile_text = (
|
|
||||||
f"FROM {ollama_gguf}\n"
|
|
||||||
f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
|
|
||||||
f"PARAMETER temperature 0.7\n"
|
|
||||||
f"PARAMETER top_p 0.9\n"
|
|
||||||
f"PARAMETER num_ctx 32768\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Write Modelfile to disk as a reference (useful for debugging)
|
|
||||||
(OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
|
|
||||||
|
|
||||||
# ── Create via Ollama API ─────────────────────────────────────────────────
|
|
||||||
print(f"\nRegistering '{model_name}' with Ollama at {ollama_url} …")
|
|
||||||
try:
|
|
||||||
r = requests.post(
|
|
||||||
f"{ollama_url}/api/create",
|
|
||||||
json={"name": model_name, "modelfile": modelfile_text},
|
|
||||||
timeout=300,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
for line in r.iter_lines():
|
|
||||||
if line:
|
|
||||||
import json as _json
|
|
||||||
try:
|
|
||||||
msg = _json.loads(line).get("status", "")
|
|
||||||
except Exception:
|
|
||||||
msg = line.decode()
|
|
||||||
if msg:
|
|
||||||
print(f" {msg}")
|
|
||||||
if r.status_code != 200:
|
|
||||||
print(f" WARNING: Ollama returned HTTP {r.status_code}")
|
|
||||||
return False
|
|
||||||
except Exception as exc:
|
|
||||||
print(f" Ollama registration failed: {exc}")
|
|
||||||
print(f" Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# ── Update config/llm.yaml ────────────────────────────────────────────────
|
|
||||||
llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
|
|
||||||
if llm_yaml.exists():
|
|
||||||
try:
|
|
||||||
import yaml as _yaml
|
|
||||||
cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
|
|
||||||
if "backends" in cfg and "ollama" in cfg["backends"]:
|
|
||||||
cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
|
|
||||||
llm_yaml.write_text(
|
|
||||||
_yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
|
|
||||||
)
|
|
||||||
print(f" llm.yaml updated → ollama.model = {model_name}:latest")
|
|
||||||
except Exception as exc:
|
|
||||||
print(f" Could not update llm.yaml automatically: {exc}")
|
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'='*60}")
|
||||||
print(f" Model ready: {model_name}:latest")
|
print(" DONE — next steps to load into Ollama:")
|
||||||
print(f" Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
|
print(f"{'='*60}")
|
||||||
print(f"{'='*60}\n")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
if gguf_path and gguf_path.exists():
|
if gguf_path and gguf_path.exists():
|
||||||
_auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT)
|
modelfile = OUTPUT_DIR / "Modelfile"
|
||||||
|
modelfile.write_text(f"""FROM {gguf_path}
|
||||||
|
SYSTEM \"\"\"
|
||||||
|
{SYSTEM_PROMPT}
|
||||||
|
\"\"\"
|
||||||
|
PARAMETER temperature 0.7
|
||||||
|
PARAMETER top_p 0.9
|
||||||
|
PARAMETER num_ctx 32768
|
||||||
|
""")
|
||||||
|
print(f"\n1. Modelfile written to: {modelfile}")
|
||||||
|
print(f"\n2. Create the Ollama model:")
|
||||||
|
print(f" ollama create {OLLAMA_NAME} -f {modelfile}")
|
||||||
|
print(f"\n3. Test it:")
|
||||||
|
print(f" ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
|
||||||
|
print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
|
||||||
|
print(f" then pick it in Settings → LLM Backends → Ollama → Model.")
|
||||||
else:
|
else:
|
||||||
print(f"\n{'='*60}")
|
print(f"\n Adapter only (no GGUF). To convert manually:")
|
||||||
print(" Adapter saved (no GGUF produced).")
|
print(f" 1. Merge adapter:")
|
||||||
print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
|
print(f" conda run -n ogma python -c \"")
|
||||||
print(f" Adapter path: {adapter_path}")
|
print(f" from peft import AutoPeftModelForCausalLM")
|
||||||
print(f"{'='*60}\n")
|
print(f" m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
|
||||||
|
print(f" m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
|
||||||
|
print(f" 2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
|
||||||
|
print(f" 3. ollama create {OLLAMA_NAME} -f Modelfile")
|
||||||
|
print()
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ Usage:
|
||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -23,10 +22,7 @@ from scripts.user_profile import UserProfile
|
||||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
|
||||||
_docs_env = os.environ.get("DOCS_DIR", "")
|
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
_docs = Path(_docs_env) if _docs_env else (
|
|
||||||
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
|
||||||
)
|
|
||||||
LETTERS_DIR = _docs
|
LETTERS_DIR = _docs
|
||||||
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
||||||
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
||||||
|
|
@ -81,16 +77,6 @@ def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]:
|
||||||
if p not in seen:
|
if p not in seen:
|
||||||
seen.add(p)
|
seen.add(p)
|
||||||
all_paths.append(p)
|
all_paths.append(p)
|
||||||
|
|
||||||
# Also scan web-uploaded files (Settings → Fine-tune → Upload)
|
|
||||||
uploads_dir = letters_dir / "training_data" / "uploads"
|
|
||||||
if uploads_dir.exists():
|
|
||||||
for glob in ("*.md", "*.txt"):
|
|
||||||
for p in uploads_dir.glob(glob):
|
|
||||||
if p not in seen:
|
|
||||||
seen.add(p)
|
|
||||||
all_paths.append(p)
|
|
||||||
|
|
||||||
for path in sorted(all_paths):
|
for path in sorted(all_paths):
|
||||||
text = path.read_text(encoding="utf-8", errors="ignore").strip()
|
text = path.read_text(encoding="utf-8", errors="ignore").strip()
|
||||||
if not text or len(text) < 100:
|
if not text or len(text) < 100:
|
||||||
|
|
|
||||||
|
|
@ -243,17 +243,6 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
elif task_type == "prepare_training":
|
|
||||||
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
|
|
||||||
records = build_records()
|
|
||||||
write_jsonl(records, DEFAULT_OUTPUT)
|
|
||||||
n = len(records)
|
|
||||||
update_task_status(
|
|
||||||
db_path, task_id, "completed",
|
|
||||||
error=f"{n} training pair{'s' if n != 1 else ''} extracted",
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown task_type: {task_type!r}")
|
raise ValueError(f"Unknown task_type: {task_type!r}")
|
||||||
|
|
||||||
|
|
|
||||||
127
setup.sh
127
setup.sh
|
|
@ -64,35 +64,6 @@ install_git() {
|
||||||
success "git installed."
|
success "git installed."
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Podman detection ───────────────────────────────────────────────────────────
|
|
||||||
# If Podman is already present, skip Docker entirely and ensure podman-compose is available.
|
|
||||||
check_podman() {
|
|
||||||
if ! cmd_exists podman; then return 1; fi
|
|
||||||
success "Podman detected ($(podman --version)) — skipping Docker install."
|
|
||||||
# Ensure a compose provider is available
|
|
||||||
if podman compose version &>/dev/null 2>&1; then
|
|
||||||
success "podman compose available."
|
|
||||||
elif cmd_exists podman-compose; then
|
|
||||||
success "podman-compose available."
|
|
||||||
else
|
|
||||||
info "Installing podman-compose…"
|
|
||||||
case "$DISTRO_FAMILY" in
|
|
||||||
debian) $SUDO apt-get install -y podman-compose 2>/dev/null \
|
|
||||||
|| pip3 install --user podman-compose ;;
|
|
||||||
fedora) $SUDO dnf install -y podman-compose 2>/dev/null \
|
|
||||||
|| pip3 install --user podman-compose ;;
|
|
||||||
arch) $SUDO pacman -Sy --noconfirm podman-compose 2>/dev/null \
|
|
||||||
|| pip3 install --user podman-compose ;;
|
|
||||||
macos) brew install podman-compose 2>/dev/null \
|
|
||||||
|| pip3 install --user podman-compose ;;
|
|
||||||
esac
|
|
||||||
success "podman-compose installed."
|
|
||||||
fi
|
|
||||||
warn "GPU profiles (single-gpu, dual-gpu) require CDI setup:"
|
|
||||||
warn " sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# ── Docker ─────────────────────────────────────────────────────────────────────
|
# ── Docker ─────────────────────────────────────────────────────────────────────
|
||||||
install_docker_linux_debian() {
|
install_docker_linux_debian() {
|
||||||
$SUDO apt-get update -q
|
$SUDO apt-get update -q
|
||||||
|
|
@ -168,27 +139,6 @@ check_compose() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Docker daemon health check ──────────────────────────────────────────────────
|
|
||||||
check_docker_running() {
|
|
||||||
if docker info &>/dev/null 2>&1; then
|
|
||||||
success "Docker daemon is running."
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
warn "Docker daemon is not responding."
|
|
||||||
if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then
|
|
||||||
info "Starting Docker service…"
|
|
||||||
$SUDO systemctl start docker 2>/dev/null || true
|
|
||||||
sleep 2
|
|
||||||
if docker info &>/dev/null 2>&1; then
|
|
||||||
success "Docker daemon started."
|
|
||||||
else
|
|
||||||
warn "Docker failed to start. Run: sudo systemctl start docker"
|
|
||||||
fi
|
|
||||||
elif [[ "$OS" == "Darwin" ]]; then
|
|
||||||
warn "Docker Desktop is not running. Start it, wait for the whale icon, then run 'make start'."
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# ── NVIDIA Container Toolkit ───────────────────────────────────────────────────
|
# ── NVIDIA Container Toolkit ───────────────────────────────────────────────────
|
||||||
install_nvidia_toolkit() {
|
install_nvidia_toolkit() {
|
||||||
[[ "$OS" != "Linux" ]] && return # macOS has no NVIDIA support
|
[[ "$OS" != "Linux" ]] && return # macOS has no NVIDIA support
|
||||||
|
|
@ -196,8 +146,8 @@ install_nvidia_toolkit() {
|
||||||
info "No NVIDIA GPU detected — skipping Container Toolkit."
|
info "No NVIDIA GPU detected — skipping Container Toolkit."
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
if cmd_exists nvidia-ctk && nvidia-ctk runtime validate --runtime=docker &>/dev/null 2>&1; then
|
if docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu22.04 nvidia-smi &>/dev/null 2>&1; then
|
||||||
success "NVIDIA Container Toolkit already configured."
|
success "NVIDIA Container Toolkit already working."
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
info "NVIDIA GPU detected. Installing Container Toolkit…"
|
info "NVIDIA GPU detected. Installing Container Toolkit…"
|
||||||
|
|
@ -226,8 +176,6 @@ install_nvidia_toolkit() {
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Environment setup ──────────────────────────────────────────────────────────
|
# ── Environment setup ──────────────────────────────────────────────────────────
|
||||||
# Note: Ollama runs as a Docker container — the compose.yml ollama service
|
|
||||||
# handles model download automatically on first start (see docker/ollama/entrypoint.sh).
|
|
||||||
setup_env() {
|
setup_env() {
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
|
if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
|
||||||
|
|
@ -238,88 +186,29 @@ setup_env() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Model weights storage ───────────────────────────────────────────────────────
|
|
||||||
_update_env_key() {
|
|
||||||
# Portable in-place key=value update for .env files (Linux + macOS).
|
|
||||||
# Appends the key if not already present.
|
|
||||||
local file="$1" key="$2" val="$3"
|
|
||||||
awk -v k="$key" -v v="$val" '
|
|
||||||
BEGIN { found=0 }
|
|
||||||
$0 ~ ("^" k "=") { print k "=" v; found=1; next }
|
|
||||||
{ print }
|
|
||||||
END { if (!found) print k "=" v }
|
|
||||||
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
|
|
||||||
}
|
|
||||||
|
|
||||||
configure_model_paths() {
|
|
||||||
local env_file
|
|
||||||
env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
|
|
||||||
|
|
||||||
# Skip prompts when stdin is not a terminal (e.g. curl | bash)
|
|
||||||
if [[ ! -t 0 ]]; then
|
|
||||||
info "Non-interactive — using default model paths from .env"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
info "Model weights storage"
|
|
||||||
echo -e " AI models can be 2–30+ GB each. If you have a separate data drive,"
|
|
||||||
echo -e " point these at it now. Press Enter to keep the value shown in [brackets]."
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
local current input
|
|
||||||
|
|
||||||
current="$(grep -E '^OLLAMA_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
|
|
||||||
[[ -z "$current" ]] && current="~/models/ollama"
|
|
||||||
read -rp " Ollama models dir [${current}]: " input || input=""
|
|
||||||
input="${input:-$current}"
|
|
||||||
input="${input/#\~/$HOME}"
|
|
||||||
mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
|
|
||||||
_update_env_key "$env_file" "OLLAMA_MODELS_DIR" "$input"
|
|
||||||
success "OLLAMA_MODELS_DIR=$input"
|
|
||||||
|
|
||||||
current="$(grep -E '^VLLM_MODELS_DIR=' "$env_file" 2>/dev/null | cut -d= -f2-)"
|
|
||||||
[[ -z "$current" ]] && current="~/models/vllm"
|
|
||||||
read -rp " vLLM models dir [${current}]: " input || input=""
|
|
||||||
input="${input:-$current}"
|
|
||||||
input="${input/#\~/$HOME}"
|
|
||||||
mkdir -p "$input" 2>/dev/null || warn "Could not create $input — ensure it exists before 'make start'"
|
|
||||||
_update_env_key "$env_file" "VLLM_MODELS_DIR" "$input"
|
|
||||||
success "VLLM_MODELS_DIR=$input"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
}
|
|
||||||
|
|
||||||
# ── Main ───────────────────────────────────────────────────────────────────────
|
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||||
main() {
|
main() {
|
||||||
echo ""
|
echo ""
|
||||||
echo -e "${BLUE}╔══════════════════════════════════════════════════════╗${NC}"
|
echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
|
||||||
echo -e "${BLUE}║ Peregrine — Dependency Installer ║${NC}"
|
echo -e "${BLUE}║ Peregrine — Dependency Installer ║${NC}"
|
||||||
echo -e "${BLUE}║ by Circuit Forge LLC ║${NC}"
|
echo -e "${BLUE}║ by Circuit Forge LLC ║${NC}"
|
||||||
echo -e "${BLUE}║ \"Don't be evil, for real and forever.\" ║${NC}"
|
echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
|
||||||
echo -e "${BLUE}╚══════════════════════════════════════════════════════╝${NC}"
|
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
install_git
|
install_git
|
||||||
# Podman takes precedence if already installed; otherwise install Docker
|
|
||||||
if ! check_podman; then
|
|
||||||
install_docker
|
install_docker
|
||||||
check_docker_running
|
|
||||||
check_compose
|
check_compose
|
||||||
install_nvidia_toolkit
|
install_nvidia_toolkit
|
||||||
fi
|
|
||||||
setup_env
|
setup_env
|
||||||
configure_model_paths
|
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
success "All dependencies installed."
|
success "All dependencies installed."
|
||||||
echo ""
|
echo ""
|
||||||
echo -e " ${GREEN}Next steps:${NC}"
|
echo -e " ${GREEN}Next steps:${NC}"
|
||||||
echo -e " 1. Start Peregrine:"
|
echo -e " 1. Edit ${YELLOW}.env${NC} to set your preferred ports and model paths"
|
||||||
echo -e " ${YELLOW}make start${NC} # remote/API-only (no local GPU)"
|
echo -e " 2. Start Peregrine:"
|
||||||
echo -e " ${YELLOW}make start PROFILE=cpu${NC} # local Ollama inference (CPU)"
|
echo -e " ${YELLOW}docker compose --profile remote up -d${NC}"
|
||||||
echo -e " 2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
|
echo -e " 3. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
|
||||||
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
|
|
||||||
echo ""
|
echo ""
|
||||||
if groups "$USER" 2>/dev/null | grep -q docker; then
|
if groups "$USER" 2>/dev/null | grep -q docker; then
|
||||||
true
|
true
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue