From b03add8663d766ba550b792914d6cbd200cdf2a7 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 26 Apr 2026 09:18:55 -0700 Subject: [PATCH 1/9] feat(resume-matcher): tier-aware writing model routing via cf-orch Premium/ultra users with a custom_writing_model in their session are routed to that model as the first cf-orch candidate; all other tiers use the shared Qwen2.5-3B-Instruct base. complete_json() is unchanged since fine-tuned writing models aren't trained for structured output. Adds _request_tier and _request_writing_model ContextVars. Resolution order: USER_WRITING_MODELS env var (Monday path) then Heimdall meta (future path via peregrine#110). --- resume_matcher/apps/backend/app/llm.py | 34 +++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/resume_matcher/apps/backend/app/llm.py b/resume_matcher/apps/backend/app/llm.py index a9dcbd7..13b3cff 100644 --- a/resume_matcher/apps/backend/app/llm.py +++ b/resume_matcher/apps/backend/app/llm.py @@ -59,6 +59,12 @@ MAX_JSON_CONTENT_SIZE = 1024 * 1024 # 1MB # Request-scoped user_id — set once by session_middleware_dep, read inside _allocate_orch_async. # ContextVar is safe for concurrent async requests: each request task gets its own copy. _request_user_id: ContextVar[str | None] = ContextVar("request_user_id", default=None) +_request_tier: ContextVar[str | None] = ContextVar("request_tier", default=None) +# Custom writing model for premium/ultra users — populated from Heimdall license key meta. +# Set to None for all other tiers; complete() falls back to the shared base model. +_request_writing_model: ContextVar[str | None] = ContextVar("request_writing_model", default=None) + +_PREMIUM_TIERS: frozenset[str] = frozenset({"premium", "ultra"}) def set_request_user_id(user_id: str | None) -> None: @@ -69,6 +75,22 @@ def get_request_user_id() -> str | None: return _request_user_id.get() +def set_request_tier(tier: str | None) -> None: + _request_tier.set(tier) + + +def get_request_tier() -> str | None: + return _request_tier.get() + + +def set_request_writing_model(model: str | None) -> None: + _request_writing_model.set(model) + + +def get_request_writing_model() -> str | None: + return _request_writing_model.get() + + class LLMConfig(BaseModel): """LLM configuration model.""" @@ -481,10 +503,20 @@ async def complete( cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip() if cf_orch_url: try: + # Premium/ultra users get their personal fine-tuned writing model as the + # first candidate; the base model is the fallback so cf-orch can + # degrade gracefully if the personal model isn't loaded yet. + tier = get_request_tier() + writing_model = get_request_writing_model() + model_candidates: list[str] = ( + [writing_model, "Qwen2.5-3B-Instruct"] + if writing_model and tier in _PREMIUM_TIERS + else ["Qwen2.5-3B-Instruct"] + ) async with _allocate_orch_async( cf_orch_url, "vllm", - model_candidates=["Qwen2.5-3B-Instruct"], + model_candidates=model_candidates, ttl_s=300.0, caller="peregrine-resume-matcher", ) as alloc: From 148aaf00cb35320bbc226c2a463708ddf83b9255 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sat, 2 May 2026 21:21:02 -0700 Subject: [PATCH 2/9] feat: add training export DB migration and db.py helpers Add excluded_from_training column to jobs table (migration 009 + _MIGRATIONS entry for existing DBs). Add get_db_pairs(), get_training_pairs(), and set_training_exclusion() helpers for the cover letter training export pipeline. Add test_training_export.py with 8 tests covering all helpers (all passing). --- migrations/009_training_export.sql | 1 + scripts/db.py | 99 ++++++++++++++++++++++- tests/test_training_export.py | 122 +++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 migrations/009_training_export.sql create mode 100644 tests/test_training_export.py diff --git a/migrations/009_training_export.sql b/migrations/009_training_export.sql new file mode 100644 index 0000000..87ce531 --- /dev/null +++ b/migrations/009_training_export.sql @@ -0,0 +1 @@ +ALTER TABLE jobs ADD COLUMN excluded_from_training INTEGER DEFAULT 0; diff --git a/scripts/db.py b/scripts/db.py index 6ea82a8..6daf69e 100644 --- a/scripts/db.py +++ b/scripts/db.py @@ -170,7 +170,8 @@ _MIGRATIONS = [ ("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier) ("ats_gap_report", "TEXT"), # JSON gap report (free tier) ("date_posted", "TEXT"), # Original posting date from job board (shadow listing detection) - ("hired_feedback", "TEXT"), # JSON: optional post-hire "what helped" response + ("hired_feedback", "TEXT"), # JSON: optional post-hire "what helped" response + ("excluded_from_training", "INTEGER DEFAULT 0"), # opt-out of training export ] @@ -1142,3 +1143,99 @@ def set_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0, resume_id: int = conn.commit() finally: conn.close() + +# ── Training export helpers ─────────────────────────────────────────────────── + +def _strip_greeting(text: str) -> str: + """Remove 'Dear X,' greeting line from cover letter text.""" + lines = text.splitlines() + for i, line in enumerate(lines): + stripped_line = line.strip() + if stripped_line.lower().startswith("dear ") and stripped_line.endswith((",", ":")): + rest = lines[i + 1:] + while rest and not rest[0].strip(): + rest = rest[1:] + result = "\n".join(rest).strip() + return result if result else text.strip() + return text.strip() + + +def get_db_pairs(db_path: Path) -> list[dict]: + """Return curation metadata for ALL qualifying jobs (included and excluded). + + Used by the curation UI. Includes excluded=True rows so users can restore them. + """ + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + try: + rows = conn.execute( + "SELECT id, title, company, description, status, " + " excluded_from_training " + "FROM jobs " + "WHERE status IN ('applied','phone_screen','interviewing','offer','hired') " + " AND cover_letter IS NOT NULL AND cover_letter != '' " + "ORDER BY applied_at DESC", + ).fetchall() + finally: + conn.close() + return [ + { + "job_id": row["id"], + "title": row["title"] or "", + "company": row["company"] or "", + "status": row["status"], + "instruction": ( + f"Write a cover letter for the {row['title'] or 'unknown'} " + f"position at {row['company'] or 'unknown'}." + ), + "input_preview": (row["description"] or "")[:200], + "excluded": bool(row["excluded_from_training"]), + } + for row in rows + ] + + +def get_training_pairs(db_path: Path) -> list[dict]: + """Return Alpaca-format training pairs for non-excluded qualifying jobs. + + Used by the JSONL export endpoint. + """ + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + try: + rows = conn.execute( + "SELECT id, title, company, description, cover_letter " + "FROM jobs " + "WHERE status IN ('applied','phone_screen','interviewing','offer','hired') " + " AND cover_letter IS NOT NULL AND cover_letter != '' " + " AND excluded_from_training = 0 " + "ORDER BY applied_at DESC", + ).fetchall() + finally: + conn.close() + return [ + { + "instruction": ( + f"Write a cover letter for the {row['title'] or 'unknown'} " + f"position at {row['company'] or 'unknown'}." + ), + "input": row["description"] or "", + "output": _strip_greeting(row["cover_letter"]), + "source": "db", + "job_id": row["id"], + } + for row in rows + ] + + +def set_training_exclusion(db_path: Path, job_id: int, excluded: bool) -> None: + """Set excluded_from_training flag on a job.""" + conn = sqlite3.connect(db_path) + try: + conn.execute( + "UPDATE jobs SET excluded_from_training = ? WHERE id = ?", + (1 if excluded else 0, job_id), + ) + conn.commit() + finally: + conn.close() diff --git a/tests/test_training_export.py b/tests/test_training_export.py new file mode 100644 index 0000000..e32b06e --- /dev/null +++ b/tests/test_training_export.py @@ -0,0 +1,122 @@ +"""Tests for cover letter training export helpers.""" +import json +import sqlite3 +import pytest +from pathlib import Path + + +def _make_db(tmp_path: Path) -> Path: + from scripts.db import init_db + db = tmp_path / "test.db" + init_db(db) + # excluded_from_training column is added by _migrate_db via _MIGRATIONS — no manual ALTER needed + return db + + +def _insert_job(db: Path, *, title="Engineer", company="Acme", status="applied", + cover_letter="Dear Hiring Manager,\n\nI am excited.", description="Build stuff.", + excluded=0) -> int: + conn = sqlite3.connect(db) + cur = conn.execute( + "INSERT INTO jobs (title, company, url, source, location, is_remote, salary, " + "description, date_found, status, cover_letter, excluded_from_training) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?)", + (title, company, f"https://example.com/{title}", "test", "Remote", 1, "", + description, "2026-01-01", status, cover_letter, excluded), + ) + conn.commit() + job_id = cur.lastrowid + conn.close() + return job_id + + +def test_get_training_pairs_returns_applied_jobs(tmp_path): + from scripts.db import get_training_pairs + db = _make_db(tmp_path) + _insert_job(db, title="Engineer", company="Acme", status="applied") + pairs = get_training_pairs(db) + assert len(pairs) == 1 + assert pairs[0]["source"] == "db" + assert pairs[0]["instruction"] == "Write a cover letter for the Engineer position at Acme." + assert "job_id" in pairs[0] + + +def test_get_training_pairs_strips_greeting(tmp_path): + from scripts.db import get_training_pairs + db = _make_db(tmp_path) + _insert_job(db, cover_letter="Dear Hiring Manager,\n\nI am excited to apply.\n\nSincerely, Me") + pairs = get_training_pairs(db) + assert not pairs[0]["output"].startswith("Dear") + assert "I am excited" in pairs[0]["output"] + + +def test_get_training_pairs_excludes_non_applied(tmp_path): + from scripts.db import get_training_pairs + db = _make_db(tmp_path) + _insert_job(db, title="PendingJob", status="pending") + _insert_job(db, title="ApprovedJob", status="approved") + pairs = get_training_pairs(db) + assert len(pairs) == 0 + + +def test_get_training_pairs_excludes_opted_out(tmp_path): + from scripts.db import get_training_pairs + db = _make_db(tmp_path) + _insert_job(db, excluded=1) + pairs = get_training_pairs(db) + assert len(pairs) == 0 + + +def test_get_training_pairs_null_description_gives_empty_input(tmp_path): + from scripts.db import get_training_pairs + db = _make_db(tmp_path) + conn = sqlite3.connect(db) + conn.execute( + "INSERT INTO jobs (title, company, url, source, location, is_remote, salary, " + "date_found, status, cover_letter, excluded_from_training) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?)", + ("Dev", "Corp", "https://x.com/1", "test", "Remote", 1, "", + "2026-01-01", "applied", "Great letter body", 0), + ) + conn.commit() + conn.close() + pairs = get_training_pairs(db) + assert pairs[0]["input"] == "" + + +def test_get_db_pairs_includes_excluded_with_flag(tmp_path): + from scripts.db import get_db_pairs + db = _make_db(tmp_path) + _insert_job(db, excluded=0) + _insert_job(db, title="Other", excluded=1) + pairs = get_db_pairs(db) + assert len(pairs) == 2 + excluded = [p for p in pairs if p["excluded"]] + included = [p for p in pairs if not p["excluded"]] + assert len(excluded) == 1 + assert len(included) == 1 + + +def test_set_training_exclusion_excludes(tmp_path): + from scripts.db import get_training_pairs, set_training_exclusion + db = _make_db(tmp_path) + job_id = _insert_job(db) + assert len(get_training_pairs(db)) == 1 + set_training_exclusion(db, job_id, excluded=True) + assert len(get_training_pairs(db)) == 0 + + +def test_set_training_exclusion_restores(tmp_path): + from scripts.db import get_training_pairs, set_training_exclusion + db = _make_db(tmp_path) + job_id = _insert_job(db, excluded=1) + assert len(get_training_pairs(db)) == 0 + set_training_exclusion(db, job_id, excluded=False) + assert len(get_training_pairs(db)) == 1 + + +def test_strip_greeting_returns_original_when_no_body(tmp_path): + from scripts.db import _strip_greeting + # A letter that is only a salutation with no body should return the original text + result = _strip_greeting("Dear Hiring Manager,") + assert result == "Dear Hiring Manager," From 3b528443825e58b4d613009569803c7909a1e239 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sat, 2 May 2026 23:32:34 -0700 Subject: [PATCH 3/9] feat: add training_export_opt_in field to UserProfile --- scripts/user_profile.py | 3 +++ tests/test_training_export.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/scripts/user_profile.py b/scripts/user_profile.py index eae7982..63f4ba4 100644 --- a/scripts/user_profile.py +++ b/scripts/user_profile.py @@ -29,6 +29,7 @@ _DEFAULTS = { "tier": "free", "dev_tier_override": None, "wizard_complete": False, + "training_export_opt_in": False, "wizard_step": 0, "dismissed_banners": [], "ui_preference": "streamlit", @@ -77,6 +78,7 @@ class UserProfile: self.tier: str = data.get("tier", "free") self.dev_tier_override: str | None = data.get("dev_tier_override") or None self.wizard_complete: bool = bool(data.get("wizard_complete", False)) + self.training_export_opt_in: bool = bool(data.get("training_export_opt_in", False)) self.wizard_step: int = int(data.get("wizard_step", 0)) self.dismissed_banners: list[str] = list(data.get("dismissed_banners", [])) raw_pref = data.get("ui_preference", "streamlit") @@ -104,6 +106,7 @@ class UserProfile: "tier": self.tier, "dev_tier_override": self.dev_tier_override, "wizard_complete": self.wizard_complete, + "training_export_opt_in": self.training_export_opt_in, "wizard_step": self.wizard_step, "dismissed_banners": self.dismissed_banners, "ui_preference": self.ui_preference, diff --git a/tests/test_training_export.py b/tests/test_training_export.py index e32b06e..23c8b8c 100644 --- a/tests/test_training_export.py +++ b/tests/test_training_export.py @@ -120,3 +120,22 @@ def test_strip_greeting_returns_original_when_no_body(tmp_path): # A letter that is only a salutation with no body should return the original text result = _strip_greeting("Dear Hiring Manager,") assert result == "Dear Hiring Manager," + + +def test_user_profile_training_opt_in_defaults_false(tmp_path): + from scripts.user_profile import UserProfile + yaml_path = tmp_path / "user.yaml" + yaml_path.write_text("name: Test\nemail: test@example.com\n") + profile = UserProfile(yaml_path) + assert profile.training_export_opt_in is False + + +def test_user_profile_training_opt_in_roundtrip(tmp_path): + from scripts.user_profile import UserProfile + yaml_path = tmp_path / "user.yaml" + yaml_path.write_text("name: Test\nemail: test@example.com\n") + profile = UserProfile(yaml_path) + profile.training_export_opt_in = True + profile.save() + reloaded = UserProfile(yaml_path) + assert reloaded.training_export_opt_in is True From 25473aef770dede61756638ea8d353d86a5b017f Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sat, 2 May 2026 23:40:44 -0700 Subject: [PATCH 4/9] feat: add training export API endpoints to dev_api.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - PATCH /api/settings/fine-tune/opt-in — toggle training_export_opt_in in user.yaml - GET /api/settings/fine-tune/db-pairs — list DB jobs with exclusion flags (403 without opt-in) - PATCH /api/settings/fine-tune/db-pairs/{id}/exclude|include — per-job exclusion toggle - GET /api/settings/fine-tune/export — NDJSON streaming download of all training pairs (DB + file) - POST/GET /api/settings/fine-tune/cloud-request|cloud-status — Phase 2 stubs (501) - finetune_status now includes opted_in field - 6 new API tests; all 17 tests pass --- dev-api.py | 118 +++++++++++++++++++++++++++++++++- tests/test_training_export.py | 83 ++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 1 deletion(-) diff --git a/dev-api.py b/dev-api.py index a27d9f2..d09b233 100644 --- a/dev-api.py +++ b/dev-api.py @@ -3581,8 +3581,13 @@ def finetune_status(): db_count = task.get("result_count", 0) or 0 pairs_count = max(pairs_count, db_count) status = task.get("status", "idle") if task else "idle" + try: + from scripts.user_profile import UserProfile + _opted_in = UserProfile(Path(_user_yaml_path())).training_export_opt_in + except Exception: + _opted_in = False # Stub quota for self-hosted; cloud overrides via its own middleware - return {"status": status, "pairs_count": pairs_count, "quota_remaining": None} + return {"status": status, "pairs_count": pairs_count, "quota_remaining": None, "opted_in": _opted_in} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -3663,6 +3668,117 @@ def finetune_local_status(): return {"model_ready": False} +# ── Settings: Fine-Tune — Training Export ───────────────────────────────────── + +class TrainingOptInBody(BaseModel): + enabled: bool + + +def _training_opt_in_required() -> None: + """Raise 403 if training_export_opt_in is not enabled in user profile.""" + try: + from scripts.user_profile import UserProfile + profile = UserProfile(Path(_user_yaml_path())) + if not profile.training_export_opt_in: + raise HTTPException( + status_code=403, + detail="Training export is not enabled. Enable it in Settings → Fine-Tune.", + ) + except FileNotFoundError: + raise HTTPException( + status_code=403, + detail="Training export is not enabled. Enable it in Settings → Fine-Tune.", + ) + + +@app.patch("/api/settings/fine-tune/opt-in") +def set_training_opt_in(body: TrainingOptInBody): + try: + from scripts.user_profile import UserProfile + profile = UserProfile(Path(_user_yaml_path())) + profile.training_export_opt_in = body.enabled + profile.save() + return {"ok": True, "enabled": profile.training_export_opt_in} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/settings/fine-tune/db-pairs") +def list_db_pairs(): + _training_opt_in_required() + try: + from scripts.db import get_db_pairs + db_path = Path(_request_db.get() or DB_PATH) + pairs = get_db_pairs(db_path) + excluded_count = sum(1 for p in pairs if p["excluded"]) + return { + "pairs": pairs, + "total": len(pairs), + "excluded_count": excluded_count, + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/exclude") +def exclude_db_pair(job_id: int): + _training_opt_in_required() + try: + from scripts.db import set_training_exclusion + set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=True) + return {"ok": True, "job_id": job_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/include") +def include_db_pair(job_id: int): + _training_opt_in_required() + try: + from scripts.db import set_training_exclusion + set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=False) + return {"ok": True, "job_id": job_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/settings/fine-tune/export") +def export_training_jsonl(): + _training_opt_in_required() + import json as _json + from fastapi.responses import StreamingResponse + from scripts.db import get_training_pairs + + db_path = Path(_request_db.get() or DB_PATH) + db_pairs = get_training_pairs(db_path) + file_pairs = _load_training_pairs() + + def _generate(): + for pair in db_pairs: + yield _json.dumps(pair, ensure_ascii=False) + "\n" + for pair in file_pairs: + record = dict(pair) + record.setdefault("source", "file") + yield _json.dumps(record, ensure_ascii=False) + "\n" + + return StreamingResponse( + _generate(), + media_type="application/x-ndjson", + headers={"Content-Disposition": 'attachment; filename="peregrine_training_pairs.jsonl"'}, + ) + + +# Phase 2 stubs — reserved, not yet implemented +@app.post("/api/settings/fine-tune/cloud-request") +def cloud_finetune_request(): + raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.") + + +@app.get("/api/settings/fine-tune/cloud-status") +def cloud_finetune_status(): + raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.") + + # ── Settings: License ───────────────────────────────────────────────────────── # _config_dir() / _license_path() / _tokens_path() are per-request (see helpers above) diff --git a/tests/test_training_export.py b/tests/test_training_export.py index 23c8b8c..e5dbca9 100644 --- a/tests/test_training_export.py +++ b/tests/test_training_export.py @@ -139,3 +139,86 @@ def test_user_profile_training_opt_in_roundtrip(tmp_path): profile.save() reloaded = UserProfile(yaml_path) assert reloaded.training_export_opt_in is True + + +# ── API tests ───────────────────────────────────────────────────────────────── + +@pytest.fixture() +def api_client(tmp_path, monkeypatch): + """TestClient with a fresh DB and user.yaml for training export endpoints.""" + import yaml + from fastapi.testclient import TestClient + + db = _make_db(tmp_path) + yaml_path = tmp_path / "config" / "user.yaml" + yaml_path.parent.mkdir(parents=True) + yaml_path.write_text(yaml.dump({"name": "Test", "email": "t@t.com"})) + + monkeypatch.setenv("STAGING_DB", str(db)) + monkeypatch.setattr("dev_api.DB_PATH", str(db)) + monkeypatch.setattr("dev_api._user_yaml_path", lambda: str(yaml_path)) + + from dev_api import app + return TestClient(app), db, yaml_path + + +def test_opt_in_toggle(api_client): + client, db, yaml_path = api_client + resp = client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + assert resp.status_code == 200 + assert resp.json()["enabled"] is True + import yaml as _yaml + data = _yaml.safe_load(yaml_path.read_text()) + assert data["training_export_opt_in"] is True + + +def test_db_pairs_blocked_without_opt_in(api_client): + client, db, yaml_path = api_client + resp = client.get("/api/settings/fine-tune/db-pairs") + assert resp.status_code == 403 + + +def test_db_pairs_returns_jobs_when_opted_in(api_client): + client, db, yaml_path = api_client + _insert_job(db, title="Engineer", company="Acme") + client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + resp = client.get("/api/settings/fine-tune/db-pairs") + assert resp.status_code == 200 + data = resp.json() + assert data["total"] >= 1 + assert data["pairs"][0]["title"] == "Engineer" + + +def test_exclude_and_restore(api_client): + client, db, yaml_path = api_client + job_id = _insert_job(db) + client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + resp = client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/exclude") + assert resp.status_code == 200 + pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"] + assert any(p["job_id"] == job_id and p["excluded"] for p in pairs) + client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/include") + pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"] + assert any(p["job_id"] == job_id and not p["excluded"] for p in pairs) + + +def test_export_jsonl_blocked_without_opt_in(api_client): + client, db, yaml_path = api_client + resp = client.get("/api/settings/fine-tune/export") + assert resp.status_code == 403 + + +def test_export_jsonl_streams_valid_records(api_client): + client, db, yaml_path = api_client + _insert_job(db, cover_letter="Dear Sir,\n\nGreat role body.", description="Build things.") + client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + resp = client.get("/api/settings/fine-tune/export") + assert resp.status_code == 200 + assert "attachment" in resp.headers.get("content-disposition", "") + lines = [l for l in resp.text.strip().splitlines() if l] + assert len(lines) >= 1 + record = json.loads(lines[0]) + assert "instruction" in record + assert "input" in record + assert "output" in record + assert record["source"] == "db" From 8e6cc0229563569e2ec8a5f1d64e8ba83ea24329 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 3 May 2026 00:14:22 -0700 Subject: [PATCH 5/9] feat: add training export state and actions to fineTune store --- web/src/stores/settings/fineTune.test.ts | 46 ++++++++++++++ web/src/stores/settings/fineTune.ts | 79 ++++++++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/web/src/stores/settings/fineTune.test.ts b/web/src/stores/settings/fineTune.test.ts index 838a7e5..871e105 100644 --- a/web/src/stores/settings/fineTune.test.ts +++ b/web/src/stores/settings/fineTune.test.ts @@ -1,8 +1,11 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' import { setActivePinia, createPinia } from 'pinia' import { useFineTuneStore } from './fineTune' +import type { DbPair } from './fineTune' vi.mock('../../composables/useApi', () => ({ useApiFetch: vi.fn() })) +vi.mock('../appConfig', () => ({ useAppConfigStore: vi.fn(() => ({ isDemo: false })) })) +vi.mock('../../composables/useToast', () => ({ showToast: vi.fn() })) import { useApiFetch } from '../../composables/useApi' const mockFetch = vi.mocked(useApiFetch) @@ -36,4 +39,47 @@ describe('useFineTuneStore', () => { expect(mockFetch).toHaveBeenCalledWith('/api/settings/fine-tune/status') store.stopPolling() }) + + it('toggleOptIn updates optedIn state', async () => { + mockFetch.mockResolvedValue({ data: { ok: true, enabled: true }, error: null }) + const store = useFineTuneStore() + await store.toggleOptIn(true) + expect(store.optedIn).toBe(true) + }) + + it('loadDbPairs no-ops when not opted in', async () => { + const store = useFineTuneStore() + store.optedIn = false + await store.loadDbPairs() + expect(store.dbPairs).toEqual([]) + expect(mockFetch).not.toHaveBeenCalledWith('/api/settings/fine-tune/db-pairs') + }) + + it('loadDbPairs fetches when opted in', async () => { + const pairs: DbPair[] = [{ job_id: 1, title: 'Eng', company: 'Acme', status: 'applied', instruction: 'Write...', input_preview: 'Build', excluded: false }] + mockFetch.mockResolvedValue({ data: { pairs, total: 1, excluded_count: 0 }, error: null }) + const store = useFineTuneStore() + store.optedIn = true + await store.loadDbPairs() + expect(store.dbPairs).toHaveLength(1) + }) + + it('excludeDbPair marks pair excluded and increments count', async () => { + mockFetch.mockResolvedValue({ data: { ok: true }, error: null }) + const store = useFineTuneStore() + store.dbPairs = [{ job_id: 1, title: 'Eng', company: 'Acme', status: 'applied', instruction: 'Write...', input_preview: 'Build', excluded: false }] + await store.excludeDbPair(1) + expect(store.dbPairs[0].excluded).toBe(true) + expect(store.dbExcludedCount).toBe(1) + }) + + it('includeDbPair marks pair included and decrements excludedCount', async () => { + mockFetch.mockResolvedValue({ data: { ok: true }, error: null }) + const store = useFineTuneStore() + store.dbPairs = [{ job_id: 1, title: 'Eng', company: 'Acme', status: 'applied', instruction: 'Write...', input_preview: 'Build', excluded: true }] + store.dbExcludedCount = 1 + await store.includeDbPair(1) + expect(store.dbPairs[0].excluded).toBe(false) + expect(store.dbExcludedCount).toBe(0) + }) }) diff --git a/web/src/stores/settings/fineTune.ts b/web/src/stores/settings/fineTune.ts index c8df9d9..1120b6a 100644 --- a/web/src/stores/settings/fineTune.ts +++ b/web/src/stores/settings/fineTune.ts @@ -10,6 +10,16 @@ export interface TrainingPair { source_file: string } +export interface DbPair { + job_id: number + title: string + company: string + status: string + instruction: string + input_preview: string + excluded: boolean +} + export const useFineTuneStore = defineStore('settings/fineTune', () => { const step = ref(1) const inFlightJob = ref(false) @@ -22,6 +32,11 @@ export const useFineTuneStore = defineStore('settings/fineTune', () => { const pairsLoading = ref(false) let _pollTimer: ReturnType | null = null + const optedIn = ref(false) + const dbPairs = ref([]) + const dbPairsLoading = ref(false) + const dbExcludedCount = ref(0) + function resetStep() { step.value = 1 } async function loadStatus() { @@ -31,6 +46,7 @@ export const useFineTuneStore = defineStore('settings/fineTune', () => { pairsCount.value = data.pairs_count ?? 0 quotaRemaining.value = data.quota_remaining ?? null inFlightJob.value = ['queued', 'running'].includes(data.status) + optedIn.value = (data as any).opted_in ?? false } function startPolling() { @@ -68,6 +84,60 @@ export const useFineTuneStore = defineStore('settings/fineTune', () => { } } + async function toggleOptIn(enabled: boolean) { + const { data } = await useApiFetch<{ ok: boolean; enabled: boolean }>( + '/api/settings/fine-tune/opt-in', + { method: 'PATCH', body: JSON.stringify({ enabled }), headers: { 'Content-Type': 'application/json' } }, + ) + if (data) optedIn.value = data.enabled + } + + async function loadDbPairs() { + if (!optedIn.value) { dbPairs.value = []; return } + dbPairsLoading.value = true + const { data } = await useApiFetch<{ pairs: DbPair[]; total: number; excluded_count: number }>( + '/api/settings/fine-tune/db-pairs', + ) + dbPairsLoading.value = false + if (data) { + dbPairs.value = data.pairs + dbExcludedCount.value = data.excluded_count + } + } + + async function excludeDbPair(jobId: number) { + const { data } = await useApiFetch<{ ok: boolean }>( + `/api/settings/fine-tune/db-pairs/${jobId}/exclude`, + { method: 'PATCH' }, + ) + if (data?.ok) { + dbPairs.value = dbPairs.value.map(p => + p.job_id === jobId ? { ...p, excluded: true } : p, + ) + dbExcludedCount.value += 1 + } + } + + async function includeDbPair(jobId: number) { + const { data } = await useApiFetch<{ ok: boolean }>( + `/api/settings/fine-tune/db-pairs/${jobId}/include`, + { method: 'PATCH' }, + ) + if (data?.ok) { + dbPairs.value = dbPairs.value.map(p => + p.job_id === jobId ? { ...p, excluded: false } : p, + ) + dbExcludedCount.value = Math.max(0, dbExcludedCount.value - 1) + } + } + + function downloadExport() { + const a = document.createElement('a') + a.href = '/api/settings/fine-tune/export' + a.download = 'peregrine_training_pairs.jsonl' + a.click() + } + return { step, inFlightJob, @@ -85,5 +155,14 @@ export const useFineTuneStore = defineStore('settings/fineTune', () => { submitJob, loadPairs, deletePair, + optedIn, + dbPairs, + dbPairsLoading, + dbExcludedCount, + toggleOptIn, + loadDbPairs, + excludeDbPair, + includeDbPair, + downloadExport, } }) From 6bfb2bf3f7f65f48ba48494999c2612e0e1d652f Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 3 May 2026 01:04:43 -0700 Subject: [PATCH 6/9] feat: add Training Export and From Applied Jobs sections to FineTuneView (a11y-correct) --- web/src/views/settings/FineTuneView.vue | 144 +++++++++++++++++++++++- 1 file changed, 143 insertions(+), 1 deletion(-) diff --git a/web/src/views/settings/FineTuneView.vue b/web/src/views/settings/FineTuneView.vue index e4f0b0e..0086f1e 100644 --- a/web/src/views/settings/FineTuneView.vue +++ b/web/src/views/settings/FineTuneView.vue @@ -3,16 +3,31 @@ import { ref, onMounted, onUnmounted } from 'vue' import { storeToRefs } from 'pinia' import { useFineTuneStore } from '../../stores/settings/fineTune' import { useAppConfigStore } from '../../stores/appConfig' +import { showToast } from '../../composables/useToast' const store = useFineTuneStore() const config = useAppConfigStore() -const { step, inFlightJob, jobStatus, pairsCount, quotaRemaining, pairs, pairsLoading } = storeToRefs(store) +const { step, inFlightJob, jobStatus, pairsCount, quotaRemaining, pairs, pairsLoading, + optedIn, dbPairs, dbPairsLoading, dbExcludedCount } = storeToRefs(store) const fileInput = ref(null) const selectedFiles = ref([]) const uploadResult = ref<{ file_count: number } | null>(null) const extractError = ref(null) const modelReady = ref(null) +const toggling = ref(false) +const toggleSaved = ref(false) + +async function handleOptInChange(e: Event) { + const enabled = (e.target as HTMLInputElement).checked + toggling.value = true + toggleSaved.value = false + await store.toggleOptIn(enabled) + await store.loadDbPairs() + toggling.value = false + toggleSaved.value = true + setTimeout(() => { toggleSaved.value = false }, 2000) +} async function handleUpload() { if (!selectedFiles.value.length) return @@ -46,6 +61,7 @@ async function checkLocalModel() { onMounted(async () => { store.startPolling() await store.loadPairs() + await store.loadDbPairs() if (store.step === 3 && !config.isCloud) await checkLocalModel() }) onUnmounted(() => { store.stopPolling(); store.resetStep() }) @@ -55,6 +71,115 @@ onUnmounted(() => { store.stopPolling(); store.resetStep() })

Fine-Tune Model

+ + + + +
+

From Applied Jobs

+
+ + {{ + dbPairs.filter(p => !p.excluded).length === 1 + ? '1 pair available' + : `${dbPairs.filter(p => !p.excluded).length} pairs available` + }} + {{ dbExcludedCount }} excluded + +
+ +
+ +

+ Available on Premium. + Upgrade your plan → +

+
+
+
+

+ The downloaded file contains your cover letters in plain text (JSONL format). + Store it in a secure location. +

+ +
+
Loading…
+
    +
  • +
    + {{ pair.title }} · {{ pair.company }} + {{ pair.status }} +
    + + +
  • +
+

No applied jobs with cover letters found.

+
+
+
1. Upload @@ -189,4 +314,21 @@ onUnmounted(() => { store.stopPolling(); store.resetStep() }) .pair-source { font-size: 0.75rem; color: var(--color-text-muted); } .pair-delete { flex-shrink: 0; background: none; border: none; color: var(--color-error); cursor: pointer; font-size: 0.9rem; padding: 2px 4px; border-radius: var(--radius-sm); transition: background 150ms; } .pair-delete:hover { background: var(--color-error); color: #fff; } +.training-export-consent { border: 1px solid var(--color-border-light); border-radius: var(--radius-md); padding: var(--space-4, 1rem); margin-bottom: var(--space-6, 1.5rem); } +.toggle-label { display: flex; align-items: center; gap: var(--space-2, 0.5rem); font-size: 0.9rem; font-weight: 500; cursor: pointer; flex-wrap: wrap; } +.toggle-label.toggle-saving { opacity: 0.7; } +.toggle-label input[type="checkbox"] { width: 16px; height: 16px; accent-color: var(--color-primary); cursor: pointer; flex-shrink: 0; } +.toggle-status { font-size: 0.8rem; color: var(--color-text-muted); margin-left: var(--space-1, 0.25rem); } +.opt-out-receipt { display: block; margin-top: var(--space-1, 0.25rem); color: var(--color-text-muted); font-size: 0.8rem; } +.db-pairs-header { display: flex; align-items: flex-start; justify-content: space-between; flex-wrap: wrap; gap: var(--space-3, 0.75rem); margin-bottom: var(--space-4, 1rem); } +.db-pairs-actions { display: flex; align-items: flex-start; gap: var(--space-2, 0.5rem); flex-wrap: wrap; } +.cloud-finetune-wrap { display: flex; flex-direction: column; gap: var(--space-1, 0.25rem); } +.tier-gate-note { font-size: 0.8rem; color: var(--color-text-muted); margin: 0; } +.upgrade-link { color: var(--color-primary); text-decoration: underline; } +.excluded-badge { margin-left: var(--space-2, 0.5rem); background: var(--color-warning-bg, #fef3c7); color: var(--color-warning-fg, #92400e); font-size: 0.75rem; padding: 1px 6px; border-radius: var(--radius-full, 9999px); } +.db-pairs-items { max-height: 320px; } +.pair-excluded { opacity: 0.5; } +.pair-restore { flex-shrink: 0; background: none; border: 1px solid var(--color-border); color: var(--color-text-muted); cursor: pointer; font-size: 0.8rem; padding: 2px 8px; border-radius: var(--radius-sm); } +.pair-restore:hover { background: var(--color-surface-alt); } +.download-advisory { margin-top: var(--space-2, 0.5rem); font-style: italic; } From 0e40750450df6de57753119a33656c88bb83fa54 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 3 May 2026 01:11:06 -0700 Subject: [PATCH 7/9] feat: add WizardTrainingStep opt-in consent step to onboarding Inserts a new optional Training Export step between Resume and Identity in the setup wizard. Users can opt in to saving cover letters for fine-tuning dataset export. Consent copy distinguishes local vs. cloud storage. WIZARD_STEPS bumped to 7; router, and adjacent step back/next navigation updated accordingly. --- web/src/router/index.ts | 1 + web/src/stores/wizard.ts | 2 +- web/src/views/wizard/WizardIdentityStep.vue | 2 +- web/src/views/wizard/WizardResumeStep.vue | 2 +- web/src/views/wizard/WizardTrainingStep.vue | 74 +++++++++++++++++++++ 5 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 web/src/views/wizard/WizardTrainingStep.vue diff --git a/web/src/router/index.ts b/web/src/router/index.ts index bb98b35..dab3efc 100644 --- a/web/src/router/index.ts +++ b/web/src/router/index.ts @@ -46,6 +46,7 @@ export const router = createRouter({ { path: 'hardware', component: () => import('../views/wizard/WizardHardwareStep.vue') }, { path: 'tier', component: () => import('../views/wizard/WizardTierStep.vue') }, { path: 'resume', component: () => import('../views/wizard/WizardResumeStep.vue') }, + { path: 'training', component: () => import('../views/wizard/WizardTrainingStep.vue') }, { path: 'identity', component: () => import('../views/wizard/WizardIdentityStep.vue') }, { path: 'inference', component: () => import('../views/wizard/WizardInferenceStep.vue') }, { path: 'search', component: () => import('../views/wizard/WizardSearchStep.vue') }, diff --git a/web/src/stores/wizard.ts b/web/src/stores/wizard.ts index e2dba47..1824a60 100644 --- a/web/src/stores/wizard.ts +++ b/web/src/stores/wizard.ts @@ -44,7 +44,7 @@ export interface WizardInferenceData { } // Total mandatory steps (integrations step 7 is optional/skip-able) -export const WIZARD_STEPS = 6 +export const WIZARD_STEPS = 7 export const STEP_LABELS = ['Hardware', 'Tier', 'Resume', 'Identity', 'Inference', 'Search', 'Integrations'] export const STEP_ROUTES = [ '/setup/hardware', diff --git a/web/src/views/wizard/WizardIdentityStep.vue b/web/src/views/wizard/WizardIdentityStep.vue index 3a46237..907f7e4 100644 --- a/web/src/views/wizard/WizardIdentityStep.vue +++ b/web/src/views/wizard/WizardIdentityStep.vue @@ -74,7 +74,7 @@ const form = reactive({ careerSummary: wizard.identity.careerSummary, }) -function back() { router.push('/setup/resume') } +function back() { router.push('/setup/training') } async function next() { validationError.value = '' diff --git a/web/src/views/wizard/WizardResumeStep.vue b/web/src/views/wizard/WizardResumeStep.vue index 1fab58f..e86aa2a 100644 --- a/web/src/views/wizard/WizardResumeStep.vue +++ b/web/src/views/wizard/WizardResumeStep.vue @@ -216,7 +216,7 @@ async function next() { experience: wizard.resume.experience, ...(wizard.resume.parsedData ?? {}), }}) - if (ok) router.push('/setup/identity') + if (ok) router.push('/setup/training') } diff --git a/web/src/views/wizard/WizardTrainingStep.vue b/web/src/views/wizard/WizardTrainingStep.vue new file mode 100644 index 0000000..994e5b8 --- /dev/null +++ b/web/src/views/wizard/WizardTrainingStep.vue @@ -0,0 +1,74 @@ + + + + + From f42a515629027945abd4185d756b4498ad03b337 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 3 May 2026 01:34:06 -0700 Subject: [PATCH 8/9] fix: wizard step numbering and loadDbPairs race on mount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Insert Training at step 4 in WIZARD_STEPS (7→8), STEP_LABELS, and STEP_ROUTES. Bump Identity→5, Inference→6, Search→7, Integrations→8 in their respective saveStep calls. Cap resumeAt at 8. Await loadStatus() before loadDbPairs() in FineTuneView onMounted so optedIn is set before the early-exit guard runs. --- web/src/stores/wizard.ts | 9 +++++---- web/src/views/settings/FineTuneView.vue | 1 + web/src/views/wizard/WizardIdentityStep.vue | 2 +- web/src/views/wizard/WizardInferenceStep.vue | 2 +- web/src/views/wizard/WizardIntegrationsStep.vue | 2 +- web/src/views/wizard/WizardSearchStep.vue | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/web/src/stores/wizard.ts b/web/src/stores/wizard.ts index 1824a60..86ec7f8 100644 --- a/web/src/stores/wizard.ts +++ b/web/src/stores/wizard.ts @@ -43,13 +43,14 @@ export interface WizardInferenceData { testMessage: string } -// Total mandatory steps (integrations step 7 is optional/skip-able) -export const WIZARD_STEPS = 7 -export const STEP_LABELS = ['Hardware', 'Tier', 'Resume', 'Identity', 'Inference', 'Search', 'Integrations'] +// Total mandatory steps (integrations step 8 is optional/skip-able) +export const WIZARD_STEPS = 8 +export const STEP_LABELS = ['Hardware', 'Tier', 'Resume', 'Training', 'Identity', 'Inference', 'Search', 'Integrations'] export const STEP_ROUTES = [ '/setup/hardware', '/setup/tier', '/setup/resume', + '/setup/training', '/setup/identity', '/setup/inference', '/setup/search', @@ -163,7 +164,7 @@ export const useWizardStore = defineStore('wizard', () => { } // Resume at next step after last completed - const resumeAt = Math.max(1, Math.min(data.wizard_step + 1, 7)) + const resumeAt = Math.max(1, Math.min(data.wizard_step + 1, 8)) currentStep.value = resumeAt return routeForStep(resumeAt) } finally { diff --git a/web/src/views/settings/FineTuneView.vue b/web/src/views/settings/FineTuneView.vue index 0086f1e..d77ee27 100644 --- a/web/src/views/settings/FineTuneView.vue +++ b/web/src/views/settings/FineTuneView.vue @@ -60,6 +60,7 @@ async function checkLocalModel() { onMounted(async () => { store.startPolling() + await store.loadStatus() await store.loadPairs() await store.loadDbPairs() if (store.step === 3 && !config.isCloud) await checkLocalModel() diff --git a/web/src/views/wizard/WizardIdentityStep.vue b/web/src/views/wizard/WizardIdentityStep.vue index 907f7e4..46ee6bc 100644 --- a/web/src/views/wizard/WizardIdentityStep.vue +++ b/web/src/views/wizard/WizardIdentityStep.vue @@ -92,7 +92,7 @@ async function next() { } wizard.identity = { ...form } - const ok = await wizard.saveStep(4, { + const ok = await wizard.saveStep(5, { name: form.name, email: form.email, phone: form.phone, diff --git a/web/src/views/wizard/WizardInferenceStep.vue b/web/src/views/wizard/WizardInferenceStep.vue index 893cc52..a59a711 100644 --- a/web/src/views/wizard/WizardInferenceStep.vue +++ b/web/src/views/wizard/WizardInferenceStep.vue @@ -127,7 +127,7 @@ async function next() { }) wizard.inference.services = svcMap - const ok = await wizard.saveStep(5, { + const ok = await wizard.saveStep(6, { anthropic_key: form.anthropicKey, openai_url: form.openaiUrl, openai_key: form.openaiKey, diff --git a/web/src/views/wizard/WizardIntegrationsStep.vue b/web/src/views/wizard/WizardIntegrationsStep.vue index fcae2f8..7e032ff 100644 --- a/web/src/views/wizard/WizardIntegrationsStep.vue +++ b/web/src/views/wizard/WizardIntegrationsStep.vue @@ -85,7 +85,7 @@ function back() { router.push('/setup/search') } async function finish() { // Save integration selections (step 7) then mark wizard complete - await wizard.saveStep(7, { integrations: [...checkedIds.value] }) + await wizard.saveStep(8, { integrations: [...checkedIds.value] }) const ok = await wizard.complete() if (ok) router.replace('/') } diff --git a/web/src/views/wizard/WizardSearchStep.vue b/web/src/views/wizard/WizardSearchStep.vue index 00327b7..f3689da 100644 --- a/web/src/views/wizard/WizardSearchStep.vue +++ b/web/src/views/wizard/WizardSearchStep.vue @@ -143,7 +143,7 @@ async function next() { wizard.search.titles = [...form.titles] wizard.search.locations = [...form.locations] - const ok = await wizard.saveStep(6, { + const ok = await wizard.saveStep(7, { search: { titles: form.titles, locations: form.locations, From 424a946ca0d0758f26afd1ac959c6fb723ee00bc Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 3 May 2026 01:44:46 -0700 Subject: [PATCH 9/9] chore(release): v0.9.2 changelog entry --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2240162..30e8d98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). --- +## [0.9.2] — 2026-05-02 + +### Added + +- **Cover letter training export** (#111) — opt-in consent gate (`training_export_opt_in` + in `user.yaml`, default off) lets users export applied-job cover letters as Alpaca-format + JSONL for local fine-tuning. Per-job exclude/restore curation in Settings → Fine-Tune. + Streaming JSONL download merges DB pairs with any previously uploaded file pairs. + Cloud fine-tune Phase 2 stub (501) reserved for cf-orch integration. +- **WizardTrainingStep** — new onboarding consent step inserted between Resume and Identity; + skippable, opt-in default off, cloud-aware privacy copy. +- **a11y:** confirmed-state toggle (no optimistic DOM divergence), visible Premium tier gate + with upgrade link, `aria-live` region on pairs list, cloud-aware consent copy. + +--- + ## [0.9.0] — 2026-04-20 ### Added