diff --git a/.env.example b/.env.example index bf458e0..b73fcaa 100644 --- a/.env.example +++ b/.env.example @@ -45,7 +45,8 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1 # Set CF_LICENSE_KEY to authenticate with the hosted coordinator. # Leave both blank for local self-hosted cf-orch or bare-metal inference. CF_LICENSE_KEY= -CF_ORCH_URL=https://orch.circuitforge.tech +GPU_SERVER_URL=https://orch.circuitforge.tech +# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL # cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*) # The agent registers this node with the cf-orch coordinator and reports VRAM stats. diff --git a/compose.cloud.yml b/compose.cloud.yml index 4d49ca8..a57bd7c 100644 --- a/compose.cloud.yml +++ b/compose.cloud.yml @@ -37,7 +37,8 @@ services: - HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN} - PYTHONUNBUFFERED=1 - FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-} - - CF_ORCH_URL=http://host.docker.internal:7700 + - GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700} + - CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}} - CF_APP_NAME=peregrine extra_hosts: - "host.docker.internal:host-gateway" diff --git a/compose.test-cfcore.yml b/compose.test-cfcore.yml index eea3d34..c7b0fbc 100644 --- a/compose.test-cfcore.yml +++ b/compose.test-cfcore.yml @@ -29,7 +29,8 @@ services: - STAGING_DB=/devl/job-seeker/staging.db - PYTHONUNBUFFERED=1 - STREAMLIT_SERVER_BASE_URL_PATH= - - CF_ORCH_URL=http://host.docker.internal:7700 + - GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700} + - CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}} extra_hosts: - "host.docker.internal:host-gateway" restart: "no" diff --git a/compose.yml b/compose.yml index 3e598e5..18c8860 100644 --- a/compose.yml +++ b/compose.yml @@ -20,7 +20,8 @@ services: - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-} - PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0} - PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-} - - CF_ORCH_URL=${CF_ORCH_URL:-http://host.docker.internal:7700} + - GPU_SERVER_URL=${GPU_SERVER_URL:-${CF_ORCH_URL:-http://host.docker.internal:7700}} + - CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}} - CF_APP_NAME=peregrine - PYTHONUNBUFFERED=1 extra_hosts: diff --git a/config/llm.yaml.example b/config/llm.yaml.example index 9fa9506..ac39a7d 100644 --- a/config/llm.yaml.example +++ b/config/llm.yaml.example @@ -46,11 +46,61 @@ backends: type: vision_service supports_images: true - # ── cf-orch trunk services ───────────────────────────────────────────────── - # These backends allocate via cf-orch rather than connecting to a static URL. - # cf-orch starts the service on-demand and returns its URL; the router then - # calls it directly using the openai_compat path. - # Set CF_ORCH_URL (env) or url below; leave enabled: false if cf-orch is + # ── cf-orch task-routed backends (preferred for GPU inference) ──────────── + # Use these when GPU_SERVER_URL is configured. The coordinator resolves + # product+task → model_id → node via assignments.yaml; no model IDs needed here. + # Set enabled: true once GPU_SERVER_URL is configured. + cf_cover_letter: + type: openai_compat + enabled: false + base_url: http://localhost:8008/v1 # fallback when cf-orch is unavailable + model: __auto__ + api_key: any + supports_images: false + cf_orch: + product: peregrine + task: cover_letter + ttl_s: 3600 + + cf_ats_rewrite: + type: openai_compat + enabled: false + base_url: http://localhost:8008/v1 + model: __auto__ + api_key: any + supports_images: false + cf_orch: + product: peregrine + task: ats_rewrite + ttl_s: 3600 + + cf_job_research: + type: openai_compat + enabled: false + base_url: http://localhost:8008/v1 + model: __auto__ + api_key: any + supports_images: false + cf_orch: + product: peregrine + task: job_research + ttl_s: 3600 + + cf_interview_prep: + type: openai_compat + enabled: false + base_url: http://localhost:8008/v1 + model: __auto__ + api_key: any + supports_images: false + cf_orch: + product: peregrine + task: interview_prep + ttl_s: 3600 + + # ── cf-orch trunk services (service-based, legacy) ───────────────────────── + # Generic service allocation — use the task-routed backends above when possible. + # Set GPU_SERVER_URL (env) or url below; leave enabled: false if cf-orch is # not deployed in your environment. cf_text: type: openai_compat diff --git a/dev-api.py b/dev-api.py index d8bfb56..eaf8094 100644 --- a/dev-api.py +++ b/dev-api.py @@ -48,6 +48,21 @@ _CLOUD_DATA_ROOT = Path(os.environ.get("CLOUD_DATA_ROOT", "/devl/menagerie-data _DIRECTUS_SECRET = os.environ.get("DIRECTUS_JWT_SECRET", "") IS_DEMO: bool = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes") +# Resolve GPU inference server URL. +# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → cloud default when licensed. +# Result is written back to CF_ORCH_URL so all downstream callers need no changes. +_GPU_SERVER_URL: str | None = ( + os.environ.get("GPU_SERVER_URL") + or os.environ.get("CF_ORCH_URL") + or ( + "https://orch.circuitforge.tech" + if os.environ.get("CF_LICENSE_KEY") + else None + ) +) +if _GPU_SERVER_URL: + os.environ["CF_ORCH_URL"] = _GPU_SERVER_URL + # Per-request DB path — set by cloud_session_middleware; falls back to DB_PATH _request_db: ContextVar[str | None] = ContextVar("_request_db", default=None) @@ -636,6 +651,51 @@ def resume_optimizer_task_status(job_id: int): return {"status": row["status"], "stage": row["stage"], "message": row["error"]} +def _capture_review_corrections( + db_path: Path, + job_id: int, + draft: dict, + decisions: dict, +) -> None: + """Persist (proposed, accepted) pairs when the user edits LLM output in the review UI. + + Only saves corrections where accepted=True AND the user actually modified the + proposed text (proposed != accepted). Rejections carry no training signal. + """ + from scripts.db import save_resume_correction as _save_correction + + sections = {s["section"]: s for s in (draft.get("sections") or [])} + + # ── Summary correction ──────────────────────────────────────────────────── + summary_dec = decisions.get("summary", {}) + if summary_dec.get("accepted", True): + edited_text = summary_dec.get("edited_text") + proposed_summary = sections.get("summary", {}).get("proposed", "") + if edited_text is not None and edited_text.strip() != proposed_summary.strip(): + _save_correction(db_path, job_id, "summary", proposed_summary, edited_text.strip()) + + # ── Experience bullet corrections ───────────────────────────────────────── + exp_sec = sections.get("experience", {}) + entry_diffs = { + f"{e['title']}|{e['company']}": e + for e in (exp_sec.get("entries") or []) + } + for entry_dec in (decisions.get("experience", {}).get("accepted_entries") or []): + if not entry_dec.get("accepted", True): + continue + edited_bullets = entry_dec.get("edited_bullets") + if edited_bullets is None: + continue + key = f"{entry_dec.get('title', '')}|{entry_dec.get('company', '')}" + diff = entry_diffs.get(key) + if diff is None: + continue + proposed_bullets = diff.get("proposed_bullets") or [] + cleaned = [b for b in edited_bullets if b.strip()] + if cleaned != proposed_bullets: + _save_correction(db_path, job_id, f"experience:{key}", proposed_bullets, cleaned) + + @app.get("/api/jobs/{job_id}/resume_optimizer/review") def get_resume_review(job_id: int): """Return the pending review draft for this job (populated when task is awaiting_review).""" @@ -692,6 +752,10 @@ def preview_resume_review(job_id: int, body: ResumeReviewBody): # Step 1: apply section-level decisions struct = apply_review_decisions(draft, body.decisions) + # Step 1b: capture (proposed, accepted) correction pairs for Avocet fine-tuning. + # Only fires when accepted=True and the user actually edited the LLM output. + _capture_review_corrections(db_path, job_id, draft, body.decisions) + # Step 2: inject gap framing for rejected skills (adjacent / learning) framings = [f.model_dump() for f in body.gap_framings if f.mode in ("adjacent", "learning")] if framings: @@ -713,6 +777,19 @@ def preview_resume_review(job_id: int, body: ResumeReviewBody): return {"preview_text": preview_text, "preview_struct": struct} +@app.get("/api/resume_optimizer/corrections") +def list_resume_corrections(job_id: int | None = None, limit: int = 200): + """Return resume review correction pairs for Avocet import. + + Each record is a (proposed, accepted) pair from the review UI where the + user edited the LLM output before accepting. These are SFT (supervised + fine-tuning) candidates that flow through Avocet for human review. + """ + from scripts.db import get_resume_corrections as _get_corrections + db_path = Path(_request_db.get() or DB_PATH) + return {"corrections": _get_corrections(db_path, limit=limit, job_id=job_id)} + + @app.post("/api/jobs/{job_id}/resume_optimizer/approve") def approve_resume(job_id: int, body: dict): """Save the user-approved assembled resume struct and mark the task complete.