feat: cover letter training export (Phase 1) #111

Merged
pyr0ball merged 9 commits from feat/cover-letter-training-export into main 2026-05-04 08:55:16 -07:00
2 changed files with 200 additions and 1 deletions
Showing only changes of commit 25473aef77 - Show all commits

View file

@ -3581,8 +3581,13 @@ def finetune_status():
db_count = task.get("result_count", 0) or 0
pairs_count = max(pairs_count, db_count)
status = task.get("status", "idle") if task else "idle"
try:
from scripts.user_profile import UserProfile
_opted_in = UserProfile(Path(_user_yaml_path())).training_export_opt_in
except Exception:
_opted_in = False
# Stub quota for self-hosted; cloud overrides via its own middleware
return {"status": status, "pairs_count": pairs_count, "quota_remaining": None}
return {"status": status, "pairs_count": pairs_count, "quota_remaining": None, "opted_in": _opted_in}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@ -3663,6 +3668,117 @@ def finetune_local_status():
return {"model_ready": False}
# ── Settings: Fine-Tune — Training Export ─────────────────────────────────────
class TrainingOptInBody(BaseModel):
enabled: bool
def _training_opt_in_required() -> None:
"""Raise 403 if training_export_opt_in is not enabled in user profile."""
try:
from scripts.user_profile import UserProfile
profile = UserProfile(Path(_user_yaml_path()))
if not profile.training_export_opt_in:
raise HTTPException(
status_code=403,
detail="Training export is not enabled. Enable it in Settings → Fine-Tune.",
)
except FileNotFoundError:
raise HTTPException(
status_code=403,
detail="Training export is not enabled. Enable it in Settings → Fine-Tune.",
)
@app.patch("/api/settings/fine-tune/opt-in")
def set_training_opt_in(body: TrainingOptInBody):
try:
from scripts.user_profile import UserProfile
profile = UserProfile(Path(_user_yaml_path()))
profile.training_export_opt_in = body.enabled
profile.save()
return {"ok": True, "enabled": profile.training_export_opt_in}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/settings/fine-tune/db-pairs")
def list_db_pairs():
_training_opt_in_required()
try:
from scripts.db import get_db_pairs
db_path = Path(_request_db.get() or DB_PATH)
pairs = get_db_pairs(db_path)
excluded_count = sum(1 for p in pairs if p["excluded"])
return {
"pairs": pairs,
"total": len(pairs),
"excluded_count": excluded_count,
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/exclude")
def exclude_db_pair(job_id: int):
_training_opt_in_required()
try:
from scripts.db import set_training_exclusion
set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=True)
return {"ok": True, "job_id": job_id}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.patch("/api/settings/fine-tune/db-pairs/{job_id}/include")
def include_db_pair(job_id: int):
_training_opt_in_required()
try:
from scripts.db import set_training_exclusion
set_training_exclusion(Path(_request_db.get() or DB_PATH), job_id, excluded=False)
return {"ok": True, "job_id": job_id}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/settings/fine-tune/export")
def export_training_jsonl():
_training_opt_in_required()
import json as _json
from fastapi.responses import StreamingResponse
from scripts.db import get_training_pairs
db_path = Path(_request_db.get() or DB_PATH)
db_pairs = get_training_pairs(db_path)
file_pairs = _load_training_pairs()
def _generate():
for pair in db_pairs:
yield _json.dumps(pair, ensure_ascii=False) + "\n"
for pair in file_pairs:
record = dict(pair)
record.setdefault("source", "file")
yield _json.dumps(record, ensure_ascii=False) + "\n"
return StreamingResponse(
_generate(),
media_type="application/x-ndjson",
headers={"Content-Disposition": 'attachment; filename="peregrine_training_pairs.jsonl"'},
)
# Phase 2 stubs — reserved, not yet implemented
@app.post("/api/settings/fine-tune/cloud-request")
def cloud_finetune_request():
raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.")
@app.get("/api/settings/fine-tune/cloud-status")
def cloud_finetune_status():
raise HTTPException(status_code=501, detail="Cloud fine-tune is not yet available.")
# ── Settings: License ─────────────────────────────────────────────────────────
# _config_dir() / _license_path() / _tokens_path() are per-request (see helpers above)

View file

@ -139,3 +139,86 @@ def test_user_profile_training_opt_in_roundtrip(tmp_path):
profile.save()
reloaded = UserProfile(yaml_path)
assert reloaded.training_export_opt_in is True
# ── API tests ─────────────────────────────────────────────────────────────────
@pytest.fixture()
def api_client(tmp_path, monkeypatch):
"""TestClient with a fresh DB and user.yaml for training export endpoints."""
import yaml
from fastapi.testclient import TestClient
db = _make_db(tmp_path)
yaml_path = tmp_path / "config" / "user.yaml"
yaml_path.parent.mkdir(parents=True)
yaml_path.write_text(yaml.dump({"name": "Test", "email": "t@t.com"}))
monkeypatch.setenv("STAGING_DB", str(db))
monkeypatch.setattr("dev_api.DB_PATH", str(db))
monkeypatch.setattr("dev_api._user_yaml_path", lambda: str(yaml_path))
from dev_api import app
return TestClient(app), db, yaml_path
def test_opt_in_toggle(api_client):
client, db, yaml_path = api_client
resp = client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
assert resp.status_code == 200
assert resp.json()["enabled"] is True
import yaml as _yaml
data = _yaml.safe_load(yaml_path.read_text())
assert data["training_export_opt_in"] is True
def test_db_pairs_blocked_without_opt_in(api_client):
client, db, yaml_path = api_client
resp = client.get("/api/settings/fine-tune/db-pairs")
assert resp.status_code == 403
def test_db_pairs_returns_jobs_when_opted_in(api_client):
client, db, yaml_path = api_client
_insert_job(db, title="Engineer", company="Acme")
client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
resp = client.get("/api/settings/fine-tune/db-pairs")
assert resp.status_code == 200
data = resp.json()
assert data["total"] >= 1
assert data["pairs"][0]["title"] == "Engineer"
def test_exclude_and_restore(api_client):
client, db, yaml_path = api_client
job_id = _insert_job(db)
client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
resp = client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/exclude")
assert resp.status_code == 200
pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"]
assert any(p["job_id"] == job_id and p["excluded"] for p in pairs)
client.patch(f"/api/settings/fine-tune/db-pairs/{job_id}/include")
pairs = client.get("/api/settings/fine-tune/db-pairs").json()["pairs"]
assert any(p["job_id"] == job_id and not p["excluded"] for p in pairs)
def test_export_jsonl_blocked_without_opt_in(api_client):
client, db, yaml_path = api_client
resp = client.get("/api/settings/fine-tune/export")
assert resp.status_code == 403
def test_export_jsonl_streams_valid_records(api_client):
client, db, yaml_path = api_client
_insert_job(db, cover_letter="Dear Sir,\n\nGreat role body.", description="Build things.")
client.patch("/api/settings/fine-tune/opt-in", json={"enabled": True})
resp = client.get("/api/settings/fine-tune/export")
assert resp.status_code == 200
assert "attachment" in resp.headers.get("content-disposition", "")
lines = [l for l in resp.text.strip().splitlines() if l]
assert len(lines) >= 1
record = json.loads(lines[0])
assert "instruction" in record
assert "input" in record
assert "output" in record
assert record["source"] == "db"