From 25473aef770dede61756638ea8d353d86a5b017f Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sat, 2 May 2026 23:40:44 -0700 Subject: [PATCH] feat: add training export API endpoints to dev_api.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - PATCH /api/settings/fine-tune/opt-in — toggle training_export_opt_in in user.yaml - GET /api/settings/fine-tune/db-pairs — list DB jobs with exclusion flags (403 without opt-in) - PATCH /api/settings/fine-tune/db-pairs/{id}/exclude|include — per-job exclusion toggle - GET /api/settings/fine-tune/export — NDJSON streaming download of all training pairs (DB + file) - POST/GET /api/settings/fine-tune/cloud-request|cloud-status — Phase 2 stubs (501) - finetune_status now includes opted_in field - 6 new API tests; all 17 tests pass --- dev-api.py | 118 +++++++++++++++++++++++++++++++++- tests/test_training_export.py | 83 ++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 1 deletion(-) diff --git a/dev-api.py b/dev-api.py index a27d9f2..d09b233 100644 --- a/dev-api.py +++ b/dev-api.py @@ -3581,8 +3581,13 @@ def finetune_status(): db_count = task.get("result_count", 0) or 0 pairs_count = max(pairs_count, db_count) status = task.get("status", "idle") if task else "idle" + try: + from scripts.user_profile import UserProfile + _opted_in = UserProfile(Path(_user_yaml_path())).training_export_opt_in + except Exception: + _opted_in = False # Stub quota for self-hosted; cloud overrides via its own middleware - return {"status": status, "pairs_count": pairs_count, "quota_remaining": None} + return {"status": status, "pairs_count": pairs_count, "quota_remaining": None, "opted_in": _opted_in} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -3663,6 +3668,117 @@ def finetune_local_status(): return {"model_ready": False} +# ── Settings: Fine-Tune — Training Export ───────────────────────────────────── + +class TrainingOptInBody(BaseModel): + enabled: bool + + +def _training_opt_in_required() -> None: + """Raise 403 if training_export_opt_in is not enabled in user profile.""" + try: + from scripts.user_profile import UserProfile + profile = UserProfile(Path(_user_yaml_path())) + if not profile.training_export_opt_in: + raise HTTPException( + status_code=403, + detail="Training export is not enabled. Enable it in Settings → Fine-Tune.", + ) + except FileNotFoundError: + raise HTTPException( + status_code=403, + detail="Training export is not enabled. Enable it in Settings → Fine-Tune.", + ) + + +@app.patch("/api/settings/fine-tune/opt-in") +def set_training_opt_in(body: TrainingOptInBody): + try: + from scripts.user_profile import UserProfile + profile = UserProfile(Path(_user_yaml_path())) + profile.training_export_opt_in = body.enabled + profile.save() + return {"ok": True, "enabled": profile.training_export_opt_in} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/settings/fine-tune/db-pairs") +def list_db_pairs(): + _training_opt_in_required() + try: + from scripts.db import get_db_pairs + db_path = Path(_request_db.get() or DB_PATH) + pairs = get_db_pairs(db_path) + excluded_count = sum(1 for p in pairs if p["excluded"]) + return { + "pairs": pairs, + "total": len(pairs), + "excluded_count": excluded_count, + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/exclude") +def exclude_db_pair(job_id: int): + _training_opt_in_required() + try: + from scripts.db import set_training_exclusion + set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=True) + return {"ok": True, "job_id": job_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/include") +def include_db_pair(job_id: int): + _training_opt_in_required() + try: + from scripts.db import set_training_exclusion + set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=False) + return {"ok": True, "job_id": job_id} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/settings/fine-tune/export") +def export_training_jsonl(): + _training_opt_in_required() + import json as _json + from fastapi.responses import StreamingResponse + from scripts.db import get_training_pairs + + db_path = Path(_request_db.get() or DB_PATH) + db_pairs = get_training_pairs(db_path) + file_pairs = _load_training_pairs() + + def _generate(): + for pair in db_pairs: + yield _json.dumps(pair, ensure_ascii=False) + "\n" + for pair in file_pairs: + record = dict(pair) + record.setdefault("source", "file") + yield _json.dumps(record, ensure_ascii=False) + "\n" + + return StreamingResponse( + _generate(), + media_type="application/x-ndjson", + headers={"Content-Disposition": 'attachment; filename="peregrine_training_pairs.jsonl"'}, + ) + + +# Phase 2 stubs — reserved, not yet implemented +@app.post("/api/settings/fine-tune/cloud-request") +def cloud_finetune_request(): + raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.") + + +@app.get("/api/settings/fine-tune/cloud-status") +def cloud_finetune_status(): + raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.") + + # ── Settings: License ───────────────────────────────────────────────────────── # _config_dir() / _license_path() / _tokens_path() are per-request (see helpers above) diff --git a/tests/test_training_export.py b/tests/test_training_export.py index 23c8b8c..e5dbca9 100644 --- a/tests/test_training_export.py +++ b/tests/test_training_export.py @@ -139,3 +139,86 @@ def test_user_profile_training_opt_in_roundtrip(tmp_path): profile.save() reloaded = UserProfile(yaml_path) assert reloaded.training_export_opt_in is True + + +# ── API tests ───────────────────────────────────────────────────────────────── + +@pytest.fixture() +def api_client(tmp_path, monkeypatch): + """TestClient with a fresh DB and user.yaml for training export endpoints.""" + import yaml + from fastapi.testclient import TestClient + + db = _make_db(tmp_path) + yaml_path = tmp_path / "config" / "user.yaml" + yaml_path.parent.mkdir(parents=True) + yaml_path.write_text(yaml.dump({"name": "Test", "email": "t@t.com"})) + + monkeypatch.setenv("STAGING_DB", str(db)) + monkeypatch.setattr("dev_api.DB_PATH", str(db)) + monkeypatch.setattr("dev_api._user_yaml_path", lambda: str(yaml_path)) + + from dev_api import app + return TestClient(app), db, yaml_path + + +def test_opt_in_toggle(api_client): + client, db, yaml_path = api_client + resp = client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + assert resp.status_code == 200 + assert resp.json()["enabled"] is True + import yaml as _yaml + data = _yaml.safe_load(yaml_path.read_text()) + assert data["training_export_opt_in"] is True + + +def test_db_pairs_blocked_without_opt_in(api_client): + client, db, yaml_path = api_client + resp = client.get("/api/settings/fine-tune/db-pairs") + assert resp.status_code == 403 + + +def test_db_pairs_returns_jobs_when_opted_in(api_client): + client, db, yaml_path = api_client + _insert_job(db, title="Engineer", company="Acme") + client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + resp = client.get("/api/settings/fine-tune/db-pairs") + assert resp.status_code == 200 + data = resp.json() + assert data["total"] >= 1 + assert data["pairs"][0]["title"] == "Engineer" + + +def test_exclude_and_restore(api_client): + client, db, yaml_path = api_client + job_id = _insert_job(db) + client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + resp = client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/exclude") + assert resp.status_code == 200 + pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"] + assert any(p["job_id"] == job_id and p["excluded"] for p in pairs) + client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/include") + pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"] + assert any(p["job_id"] == job_id and not p["excluded"] for p in pairs) + + +def test_export_jsonl_blocked_without_opt_in(api_client): + client, db, yaml_path = api_client + resp = client.get("/api/settings/fine-tune/export") + assert resp.status_code == 403 + + +def test_export_jsonl_streams_valid_records(api_client): + client, db, yaml_path = api_client + _insert_job(db, cover_letter="Dear Sir,\n\nGreat role body.", description="Build things.") + client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True}) + resp = client.get("/api/settings/fine-tune/export") + assert resp.status_code == 200 + assert "attachment" in resp.headers.get("content-disposition", "") + lines = [l for l in resp.text.strip().splitlines() if l] + assert len(lines) >= 1 + record = json.loads(lines[0]) + assert "instruction" in record + assert "input" in record + assert "output" in record + assert record["source"] == "db"