feat(config): GPU_SERVER_URL + cf-orch task-routed backends
- Rename user-facing env var CF_ORCH_URL → GPU_SERVER_URL with full backward-compat alias (closes #116). Priority chain: GPU_SERVER_URL → CF_ORCH_URL → orch.circuitforge.tech when CF_LICENSE_KEY present. Write-back to os.environ[CF_ORCH_URL] keeps all downstream callers unchanged. - Add four task-routed llm.yaml backends (cf_cover_letter, cf_ats_rewrite, cf_job_research, cf_interview_prep) using cf_orch.product + cf_orch.task. Coordinator resolves model/node from assignments.yaml (closes #115). - Update compose.yml, compose.cloud.yml, compose.test-cfcore.yml, .env.example to use GPU_SERVER_URL as primary documented var.
This commit is contained in:
parent
5c4992dbeb
commit
0d6ddd35cf
6 changed files with 140 additions and 9 deletions
|
|
@ -45,7 +45,8 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
||||||
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
|
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
|
||||||
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
|
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
|
||||||
CF_LICENSE_KEY=
|
CF_LICENSE_KEY=
|
||||||
CF_ORCH_URL=https://orch.circuitforge.tech
|
GPU_SERVER_URL=https://orch.circuitforge.tech
|
||||||
|
# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL
|
||||||
|
|
||||||
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
|
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
|
||||||
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
|
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,8 @@ services:
|
||||||
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
|
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
|
||||||
- PYTHONUNBUFFERED=1
|
- PYTHONUNBUFFERED=1
|
||||||
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
|
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
|
||||||
- CF_ORCH_URL=http://host.docker.internal:7700
|
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
|
||||||
|
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
|
||||||
- CF_APP_NAME=peregrine
|
- CF_APP_NAME=peregrine
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,8 @@ services:
|
||||||
- STAGING_DB=/devl/job-seeker/staging.db
|
- STAGING_DB=/devl/job-seeker/staging.db
|
||||||
- PYTHONUNBUFFERED=1
|
- PYTHONUNBUFFERED=1
|
||||||
- STREAMLIT_SERVER_BASE_URL_PATH=
|
- STREAMLIT_SERVER_BASE_URL_PATH=
|
||||||
- CF_ORCH_URL=http://host.docker.internal:7700
|
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
|
||||||
|
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
restart: "no"
|
restart: "no"
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,8 @@ services:
|
||||||
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
||||||
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
|
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
|
||||||
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
|
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
|
||||||
- CF_ORCH_URL=${CF_ORCH_URL:-http://host.docker.internal:7700}
|
- GPU_SERVER_URL=${GPU_SERVER_URL:-${CF_ORCH_URL:-http://host.docker.internal:7700}}
|
||||||
|
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
|
||||||
- CF_APP_NAME=peregrine
|
- CF_APP_NAME=peregrine
|
||||||
- PYTHONUNBUFFERED=1
|
- PYTHONUNBUFFERED=1
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
|
|
|
||||||
|
|
@ -46,11 +46,61 @@ backends:
|
||||||
type: vision_service
|
type: vision_service
|
||||||
supports_images: true
|
supports_images: true
|
||||||
|
|
||||||
# ── cf-orch trunk services ─────────────────────────────────────────────────
|
# ── cf-orch task-routed backends (preferred for GPU inference) ────────────
|
||||||
# These backends allocate via cf-orch rather than connecting to a static URL.
|
# Use these when GPU_SERVER_URL is configured. The coordinator resolves
|
||||||
# cf-orch starts the service on-demand and returns its URL; the router then
|
# product+task → model_id → node via assignments.yaml; no model IDs needed here.
|
||||||
# calls it directly using the openai_compat path.
|
# Set enabled: true once GPU_SERVER_URL is configured.
|
||||||
# Set CF_ORCH_URL (env) or url below; leave enabled: false if cf-orch is
|
cf_cover_letter:
|
||||||
|
type: openai_compat
|
||||||
|
enabled: false
|
||||||
|
base_url: http://localhost:8008/v1 # fallback when cf-orch is unavailable
|
||||||
|
model: __auto__
|
||||||
|
api_key: any
|
||||||
|
supports_images: false
|
||||||
|
cf_orch:
|
||||||
|
product: peregrine
|
||||||
|
task: cover_letter
|
||||||
|
ttl_s: 3600
|
||||||
|
|
||||||
|
cf_ats_rewrite:
|
||||||
|
type: openai_compat
|
||||||
|
enabled: false
|
||||||
|
base_url: http://localhost:8008/v1
|
||||||
|
model: __auto__
|
||||||
|
api_key: any
|
||||||
|
supports_images: false
|
||||||
|
cf_orch:
|
||||||
|
product: peregrine
|
||||||
|
task: ats_rewrite
|
||||||
|
ttl_s: 3600
|
||||||
|
|
||||||
|
cf_job_research:
|
||||||
|
type: openai_compat
|
||||||
|
enabled: false
|
||||||
|
base_url: http://localhost:8008/v1
|
||||||
|
model: __auto__
|
||||||
|
api_key: any
|
||||||
|
supports_images: false
|
||||||
|
cf_orch:
|
||||||
|
product: peregrine
|
||||||
|
task: job_research
|
||||||
|
ttl_s: 3600
|
||||||
|
|
||||||
|
cf_interview_prep:
|
||||||
|
type: openai_compat
|
||||||
|
enabled: false
|
||||||
|
base_url: http://localhost:8008/v1
|
||||||
|
model: __auto__
|
||||||
|
api_key: any
|
||||||
|
supports_images: false
|
||||||
|
cf_orch:
|
||||||
|
product: peregrine
|
||||||
|
task: interview_prep
|
||||||
|
ttl_s: 3600
|
||||||
|
|
||||||
|
# ── cf-orch trunk services (service-based, legacy) ─────────────────────────
|
||||||
|
# Generic service allocation — use the task-routed backends above when possible.
|
||||||
|
# Set GPU_SERVER_URL (env) or url below; leave enabled: false if cf-orch is
|
||||||
# not deployed in your environment.
|
# not deployed in your environment.
|
||||||
cf_text:
|
cf_text:
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
|
|
|
||||||
77
dev-api.py
77
dev-api.py
|
|
@ -48,6 +48,21 @@ _CLOUD_DATA_ROOT = Path(os.environ.get("CLOUD_DATA_ROOT", "/devl/menagerie-data
|
||||||
_DIRECTUS_SECRET = os.environ.get("DIRECTUS_JWT_SECRET", "")
|
_DIRECTUS_SECRET = os.environ.get("DIRECTUS_JWT_SECRET", "")
|
||||||
IS_DEMO: bool = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
IS_DEMO: bool = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
|
# Resolve GPU inference server URL.
|
||||||
|
# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → cloud default when licensed.
|
||||||
|
# Result is written back to CF_ORCH_URL so all downstream callers need no changes.
|
||||||
|
_GPU_SERVER_URL: str | None = (
|
||||||
|
os.environ.get("GPU_SERVER_URL")
|
||||||
|
or os.environ.get("CF_ORCH_URL")
|
||||||
|
or (
|
||||||
|
"https://orch.circuitforge.tech"
|
||||||
|
if os.environ.get("CF_LICENSE_KEY")
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if _GPU_SERVER_URL:
|
||||||
|
os.environ["CF_ORCH_URL"] = _GPU_SERVER_URL
|
||||||
|
|
||||||
# Per-request DB path — set by cloud_session_middleware; falls back to DB_PATH
|
# Per-request DB path — set by cloud_session_middleware; falls back to DB_PATH
|
||||||
_request_db: ContextVar[str | None] = ContextVar("_request_db", default=None)
|
_request_db: ContextVar[str | None] = ContextVar("_request_db", default=None)
|
||||||
|
|
||||||
|
|
@ -636,6 +651,51 @@ def resume_optimizer_task_status(job_id: int):
|
||||||
return {"status": row["status"], "stage": row["stage"], "message": row["error"]}
|
return {"status": row["status"], "stage": row["stage"], "message": row["error"]}
|
||||||
|
|
||||||
|
|
||||||
|
def _capture_review_corrections(
|
||||||
|
db_path: Path,
|
||||||
|
job_id: int,
|
||||||
|
draft: dict,
|
||||||
|
decisions: dict,
|
||||||
|
) -> None:
|
||||||
|
"""Persist (proposed, accepted) pairs when the user edits LLM output in the review UI.
|
||||||
|
|
||||||
|
Only saves corrections where accepted=True AND the user actually modified the
|
||||||
|
proposed text (proposed != accepted). Rejections carry no training signal.
|
||||||
|
"""
|
||||||
|
from scripts.db import save_resume_correction as _save_correction
|
||||||
|
|
||||||
|
sections = {s["section"]: s for s in (draft.get("sections") or [])}
|
||||||
|
|
||||||
|
# ── Summary correction ────────────────────────────────────────────────────
|
||||||
|
summary_dec = decisions.get("summary", {})
|
||||||
|
if summary_dec.get("accepted", True):
|
||||||
|
edited_text = summary_dec.get("edited_text")
|
||||||
|
proposed_summary = sections.get("summary", {}).get("proposed", "")
|
||||||
|
if edited_text is not None and edited_text.strip() != proposed_summary.strip():
|
||||||
|
_save_correction(db_path, job_id, "summary", proposed_summary, edited_text.strip())
|
||||||
|
|
||||||
|
# ── Experience bullet corrections ─────────────────────────────────────────
|
||||||
|
exp_sec = sections.get("experience", {})
|
||||||
|
entry_diffs = {
|
||||||
|
f"{e['title']}|{e['company']}": e
|
||||||
|
for e in (exp_sec.get("entries") or [])
|
||||||
|
}
|
||||||
|
for entry_dec in (decisions.get("experience", {}).get("accepted_entries") or []):
|
||||||
|
if not entry_dec.get("accepted", True):
|
||||||
|
continue
|
||||||
|
edited_bullets = entry_dec.get("edited_bullets")
|
||||||
|
if edited_bullets is None:
|
||||||
|
continue
|
||||||
|
key = f"{entry_dec.get('title', '')}|{entry_dec.get('company', '')}"
|
||||||
|
diff = entry_diffs.get(key)
|
||||||
|
if diff is None:
|
||||||
|
continue
|
||||||
|
proposed_bullets = diff.get("proposed_bullets") or []
|
||||||
|
cleaned = [b for b in edited_bullets if b.strip()]
|
||||||
|
if cleaned != proposed_bullets:
|
||||||
|
_save_correction(db_path, job_id, f"experience:{key}", proposed_bullets, cleaned)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/jobs/{job_id}/resume_optimizer/review")
|
@app.get("/api/jobs/{job_id}/resume_optimizer/review")
|
||||||
def get_resume_review(job_id: int):
|
def get_resume_review(job_id: int):
|
||||||
"""Return the pending review draft for this job (populated when task is awaiting_review)."""
|
"""Return the pending review draft for this job (populated when task is awaiting_review)."""
|
||||||
|
|
@ -692,6 +752,10 @@ def preview_resume_review(job_id: int, body: ResumeReviewBody):
|
||||||
# Step 1: apply section-level decisions
|
# Step 1: apply section-level decisions
|
||||||
struct = apply_review_decisions(draft, body.decisions)
|
struct = apply_review_decisions(draft, body.decisions)
|
||||||
|
|
||||||
|
# Step 1b: capture (proposed, accepted) correction pairs for Avocet fine-tuning.
|
||||||
|
# Only fires when accepted=True and the user actually edited the LLM output.
|
||||||
|
_capture_review_corrections(db_path, job_id, draft, body.decisions)
|
||||||
|
|
||||||
# Step 2: inject gap framing for rejected skills (adjacent / learning)
|
# Step 2: inject gap framing for rejected skills (adjacent / learning)
|
||||||
framings = [f.model_dump() for f in body.gap_framings if f.mode in ("adjacent", "learning")]
|
framings = [f.model_dump() for f in body.gap_framings if f.mode in ("adjacent", "learning")]
|
||||||
if framings:
|
if framings:
|
||||||
|
|
@ -713,6 +777,19 @@ def preview_resume_review(job_id: int, body: ResumeReviewBody):
|
||||||
return {"preview_text": preview_text, "preview_struct": struct}
|
return {"preview_text": preview_text, "preview_struct": struct}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/resume_optimizer/corrections")
|
||||||
|
def list_resume_corrections(job_id: int | None = None, limit: int = 200):
|
||||||
|
"""Return resume review correction pairs for Avocet import.
|
||||||
|
|
||||||
|
Each record is a (proposed, accepted) pair from the review UI where the
|
||||||
|
user edited the LLM output before accepting. These are SFT (supervised
|
||||||
|
fine-tuning) candidates that flow through Avocet for human review.
|
||||||
|
"""
|
||||||
|
from scripts.db import get_resume_corrections as _get_corrections
|
||||||
|
db_path = Path(_request_db.get() or DB_PATH)
|
||||||
|
return {"corrections": _get_corrections(db_path, limit=limit, job_id=job_id)}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/jobs/{job_id}/resume_optimizer/approve")
|
@app.post("/api/jobs/{job_id}/resume_optimizer/approve")
|
||||||
def approve_resume(job_id: int, body: dict):
|
def approve_resume(job_id: int, body: dict):
|
||||||
"""Save the user-approved assembled resume struct and mark the task complete.
|
"""Save the user-approved assembled resume struct and mark the task complete.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue