feat: add training export API endpoints to dev_api.py
- PATCH /api/settings/fine-tune/opt-in — toggle training_export_opt_in in user.yaml
- GET /api/settings/fine-tune/db-pairs — list DB jobs with exclusion flags (403 without opt-in)
- PATCH /api/settings/fine-tune/db-pairs/{id}/exclude|include — per-job exclusion toggle
- GET /api/settings/fine-tune/export — NDJSON streaming download of all training pairs (DB + file)
- POST/GET /api/settings/fine-tune/cloud-request|cloud-status — Phase 2 stubs (501)
- finetune_status now includes opted_in field
- 6 new API tests; all 17 tests pass
This commit is contained in:
parent
3b52844382
commit
25473aef77
2 changed files with 200 additions and 1 deletions
118
dev-api.py
118
dev-api.py
|
|
@ -3581,8 +3581,13 @@ def finetune_status():
|
|||
db_count = task.get("result_count", 0) or 0
|
||||
pairs_count = max(pairs_count, db_count)
|
||||
status = task.get("status", "idle") if task else "idle"
|
||||
try:
|
||||
from scripts.user_profile import UserProfile
|
||||
_opted_in = UserProfile(Path(_user_yaml_path())).training_export_opt_in
|
||||
except Exception:
|
||||
_opted_in = False
|
||||
# Stub quota for self-hosted; cloud overrides via its own middleware
|
||||
return {"status": status, "pairs_count": pairs_count, "quota_remaining": None}
|
||||
return {"status": status, "pairs_count": pairs_count, "quota_remaining": None, "opted_in": _opted_in}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
|
@ -3663,6 +3668,117 @@ def finetune_local_status():
|
|||
return {"model_ready": False}
|
||||
|
||||
|
||||
# ── Settings: Fine-Tune — Training Export ─────────────────────────────────────
|
||||
|
||||
class TrainingOptInBody(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
|
||||
def _training_opt_in_required() -> None:
|
||||
"""Raise 403 if training_export_opt_in is not enabled in user profile."""
|
||||
try:
|
||||
from scripts.user_profile import UserProfile
|
||||
profile = UserProfile(Path(_user_yaml_path()))
|
||||
if not profile.training_export_opt_in:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Training export is not enabled. Enable it in Settings → Fine-Tune.",
|
||||
)
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Training export is not enabled. Enable it in Settings → Fine-Tune.",
|
||||
)
|
||||
|
||||
|
||||
@app.patch("/api/settings/fine-tune/opt-in")
|
||||
def set_training_opt_in(body: TrainingOptInBody):
|
||||
try:
|
||||
from scripts.user_profile import UserProfile
|
||||
profile = UserProfile(Path(_user_yaml_path()))
|
||||
profile.training_export_opt_in = body.enabled
|
||||
profile.save()
|
||||
return {"ok": True, "enabled": profile.training_export_opt_in}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/api/settings/fine-tune/db-pairs")
|
||||
def list_db_pairs():
|
||||
_training_opt_in_required()
|
||||
try:
|
||||
from scripts.db import get_db_pairs
|
||||
db_path = Path(_request_db.get() or DB_PATH)
|
||||
pairs = get_db_pairs(db_path)
|
||||
excluded_count = sum(1 for p in pairs if p["excluded"])
|
||||
return {
|
||||
"pairs": pairs,
|
||||
"total": len(pairs),
|
||||
"excluded_count": excluded_count,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/exclude")
|
||||
def exclude_db_pair(job_id: int):
|
||||
_training_opt_in_required()
|
||||
try:
|
||||
from scripts.db import set_training_exclusion
|
||||
set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=True)
|
||||
return {"ok": True, "job_id": job_id}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/include")
|
||||
def include_db_pair(job_id: int):
|
||||
_training_opt_in_required()
|
||||
try:
|
||||
from scripts.db import set_training_exclusion
|
||||
set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=False)
|
||||
return {"ok": True, "job_id": job_id}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/api/settings/fine-tune/export")
|
||||
def export_training_jsonl():
|
||||
_training_opt_in_required()
|
||||
import json as _json
|
||||
from fastapi.responses import StreamingResponse
|
||||
from scripts.db import get_training_pairs
|
||||
|
||||
db_path = Path(_request_db.get() or DB_PATH)
|
||||
db_pairs = get_training_pairs(db_path)
|
||||
file_pairs = _load_training_pairs()
|
||||
|
||||
def _generate():
|
||||
for pair in db_pairs:
|
||||
yield _json.dumps(pair, ensure_ascii=False) + "\n"
|
||||
for pair in file_pairs:
|
||||
record = dict(pair)
|
||||
record.setdefault("source", "file")
|
||||
yield _json.dumps(record, ensure_ascii=False) + "\n"
|
||||
|
||||
return StreamingResponse(
|
||||
_generate(),
|
||||
media_type="application/x-ndjson",
|
||||
headers={"Content-Disposition": 'attachment; filename="peregrine_training_pairs.jsonl"'},
|
||||
)
|
||||
|
||||
|
||||
# Phase 2 stubs — reserved, not yet implemented
|
||||
@app.post("/api/settings/fine-tune/cloud-request")
|
||||
def cloud_finetune_request():
|
||||
raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.")
|
||||
|
||||
|
||||
@app.get("/api/settings/fine-tune/cloud-status")
|
||||
def cloud_finetune_status():
|
||||
raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.")
|
||||
|
||||
|
||||
# ── Settings: License ─────────────────────────────────────────────────────────
|
||||
|
||||
# _config_dir() / _license_path() / _tokens_path() are per-request (see helpers above)
|
||||
|
|
|
|||
|
|
@ -139,3 +139,86 @@ def test_user_profile_training_opt_in_roundtrip(tmp_path):
|
|||
profile.save()
|
||||
reloaded = UserProfile(yaml_path)
|
||||
assert reloaded.training_export_opt_in is True
|
||||
|
||||
|
||||
# ── API tests ─────────────────────────────────────────────────────────────────
|
||||
|
||||
@pytest.fixture()
|
||||
def api_client(tmp_path, monkeypatch):
|
||||
"""TestClient with a fresh DB and user.yaml for training export endpoints."""
|
||||
import yaml
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
db = _make_db(tmp_path)
|
||||
yaml_path = tmp_path / "config" / "user.yaml"
|
||||
yaml_path.parent.mkdir(parents=True)
|
||||
yaml_path.write_text(yaml.dump({"name": "Test", "email": "t@t.com"}))
|
||||
|
||||
monkeypatch.setenv("STAGING_DB", str(db))
|
||||
monkeypatch.setattr("dev_api.DB_PATH", str(db))
|
||||
monkeypatch.setattr("dev_api._user_yaml_path", lambda: str(yaml_path))
|
||||
|
||||
from dev_api import app
|
||||
return TestClient(app), db, yaml_path
|
||||
|
||||
|
||||
def test_opt_in_toggle(api_client):
|
||||
client, db, yaml_path = api_client
|
||||
resp = client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["enabled"] is True
|
||||
import yaml as _yaml
|
||||
data = _yaml.safe_load(yaml_path.read_text())
|
||||
assert data["training_export_opt_in"] is True
|
||||
|
||||
|
||||
def test_db_pairs_blocked_without_opt_in(api_client):
|
||||
client, db, yaml_path = api_client
|
||||
resp = client.get("/api/settings/fine-tune/db-pairs")
|
||||
assert resp.status_code == 403
|
||||
|
||||
|
||||
def test_db_pairs_returns_jobs_when_opted_in(api_client):
|
||||
client, db, yaml_path = api_client
|
||||
_insert_job(db, title="Engineer", company="Acme")
|
||||
client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
|
||||
resp = client.get("/api/settings/fine-tune/db-pairs")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] >= 1
|
||||
assert data["pairs"][0]["title"] == "Engineer"
|
||||
|
||||
|
||||
def test_exclude_and_restore(api_client):
|
||||
client, db, yaml_path = api_client
|
||||
job_id = _insert_job(db)
|
||||
client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
|
||||
resp = client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/exclude")
|
||||
assert resp.status_code == 200
|
||||
pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"]
|
||||
assert any(p["job_id"] == job_id and p["excluded"] for p in pairs)
|
||||
client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/include")
|
||||
pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"]
|
||||
assert any(p["job_id"] == job_id and not p["excluded"] for p in pairs)
|
||||
|
||||
|
||||
def test_export_jsonl_blocked_without_opt_in(api_client):
|
||||
client, db, yaml_path = api_client
|
||||
resp = client.get("/api/settings/fine-tune/export")
|
||||
assert resp.status_code == 403
|
||||
|
||||
|
||||
def test_export_jsonl_streams_valid_records(api_client):
|
||||
client, db, yaml_path = api_client
|
||||
_insert_job(db, cover_letter="Dear Sir,\n\nGreat role body.", description="Build things.")
|
||||
client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
|
||||
resp = client.get("/api/settings/fine-tune/export")
|
||||
assert resp.status_code == 200
|
||||
assert "attachment" in resp.headers.get("content-disposition", "")
|
||||
lines = [l for l in resp.text.strip().splitlines() if l]
|
||||
assert len(lines) >= 1
|
||||
record = json.loads(lines[0])
|
||||
assert "instruction" in record
|
||||
assert "input" in record
|
||||
assert "output" in record
|
||||
assert record["source"] == "db"
|
||||
|
|
|
|||
Loading…
Reference in a new issue