diff --git a/.env.example b/.env.example
index bf458e0..b73fcaa 100644
--- a/.env.example
+++ b/.env.example
@@ -45,7 +45,8 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
 # Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
 # Leave both blank for local self-hosted cf-orch or bare-metal inference.
 CF_LICENSE_KEY=
-CF_ORCH_URL=https://orch.circuitforge.tech
+GPU_SERVER_URL=https://orch.circuitforge.tech
+# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL
 
 # cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
 # The agent registers this node with the cf-orch coordinator and reports VRAM stats.
diff --git a/compose.cloud.yml b/compose.cloud.yml
index 4d49ca8..a57bd7c 100644
--- a/compose.cloud.yml
+++ b/compose.cloud.yml
@@ -37,7 +37,8 @@ services:
       - HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
       - PYTHONUNBUFFERED=1
       - FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
-      - CF_ORCH_URL=http://host.docker.internal:7700
+      - GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
+      - CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
       - CF_APP_NAME=peregrine
     extra_hosts:
       - "host.docker.internal:host-gateway"
diff --git a/compose.test-cfcore.yml b/compose.test-cfcore.yml
index eea3d34..c7b0fbc 100644
--- a/compose.test-cfcore.yml
+++ b/compose.test-cfcore.yml
@@ -29,7 +29,8 @@ services:
       - STAGING_DB=/devl/job-seeker/staging.db
       - PYTHONUNBUFFERED=1
       - STREAMLIT_SERVER_BASE_URL_PATH=
-      - CF_ORCH_URL=http://host.docker.internal:7700
+      - GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
+      - CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
     extra_hosts:
       - "host.docker.internal:host-gateway"
     restart: "no"
diff --git a/compose.yml b/compose.yml
index 3e598e5..18c8860 100644
--- a/compose.yml
+++ b/compose.yml
@@ -20,7 +20,8 @@ services:
       - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
       - PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
       - PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
-      - CF_ORCH_URL=${CF_ORCH_URL:-http://host.docker.internal:7700}
+      - GPU_SERVER_URL=${GPU_SERVER_URL:-${CF_ORCH_URL:-http://host.docker.internal:7700}}
+      - CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
       - CF_APP_NAME=peregrine
       - PYTHONUNBUFFERED=1
     extra_hosts:
diff --git a/config/llm.yaml.example b/config/llm.yaml.example
index 9fa9506..ac39a7d 100644
--- a/config/llm.yaml.example
+++ b/config/llm.yaml.example
@@ -46,11 +46,61 @@ backends:
     type: vision_service
     supports_images: true
 
-  # ── cf-orch trunk services ─────────────────────────────────────────────────
-  # These backends allocate via cf-orch rather than connecting to a static URL.
-  # cf-orch starts the service on-demand and returns its URL; the router then
-  # calls it directly using the openai_compat path.
-  # Set CF_ORCH_URL (env) or url below; leave enabled: false if cf-orch is
+  # ── cf-orch task-routed backends (preferred for GPU inference) ────────────
+  # Use these when GPU_SERVER_URL is configured. The coordinator resolves
+  # product+task → model_id → node via assignments.yaml; no model IDs needed here.
+  # Set enabled: true once GPU_SERVER_URL is configured.
+  cf_cover_letter:
+    type: openai_compat
+    enabled: false
+    base_url: http://localhost:8008/v1   # fallback when cf-orch is unavailable
+    model: __auto__
+    api_key: any
+    supports_images: false
+    cf_orch:
+      product: peregrine
+      task: cover_letter
+      ttl_s: 3600
+
+  cf_ats_rewrite:
+    type: openai_compat
+    enabled: false
+    base_url: http://localhost:8008/v1
+    model: __auto__
+    api_key: any
+    supports_images: false
+    cf_orch:
+      product: peregrine
+      task: ats_rewrite
+      ttl_s: 3600
+
+  cf_job_research:
+    type: openai_compat
+    enabled: false
+    base_url: http://localhost:8008/v1
+    model: __auto__
+    api_key: any
+    supports_images: false
+    cf_orch:
+      product: peregrine
+      task: job_research
+      ttl_s: 3600
+
+  cf_interview_prep:
+    type: openai_compat
+    enabled: false
+    base_url: http://localhost:8008/v1
+    model: __auto__
+    api_key: any
+    supports_images: false
+    cf_orch:
+      product: peregrine
+      task: interview_prep
+      ttl_s: 3600
+
+  # ── cf-orch trunk services (service-based, legacy) ─────────────────────────
+  # Generic service allocation — use the task-routed backends above when possible.
+  # Set GPU_SERVER_URL (env) or url below; leave enabled: false if cf-orch is
   # not deployed in your environment.
   cf_text:
     type: openai_compat
diff --git a/dev-api.py b/dev-api.py
index d8bfb56..eaf8094 100644
--- a/dev-api.py
+++ b/dev-api.py
@@ -48,6 +48,21 @@ _CLOUD_DATA_ROOT  = Path(os.environ.get("CLOUD_DATA_ROOT", "/devl/menagerie-data
 _DIRECTUS_SECRET  = os.environ.get("DIRECTUS_JWT_SECRET", "")
 IS_DEMO: bool = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
 
+# Resolve GPU inference server URL.
+# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → cloud default when licensed.
+# Result is written back to CF_ORCH_URL so all downstream callers need no changes.
+_GPU_SERVER_URL: str | None = (
+    os.environ.get("GPU_SERVER_URL")
+    or os.environ.get("CF_ORCH_URL")
+    or (
+        "https://orch.circuitforge.tech"
+        if os.environ.get("CF_LICENSE_KEY")
+        else None
+    )
+)
+if _GPU_SERVER_URL:
+    os.environ["CF_ORCH_URL"] = _GPU_SERVER_URL
+
 # Per-request DB path — set by cloud_session_middleware; falls back to DB_PATH
 _request_db: ContextVar[str | None] = ContextVar("_request_db", default=None)
 
@@ -636,6 +651,51 @@ def resume_optimizer_task_status(job_id: int):
     return {"status": row["status"], "stage": row["stage"], "message": row["error"]}
 
 
+def _capture_review_corrections(
+    db_path: Path,
+    job_id: int,
+    draft: dict,
+    decisions: dict,
+) -> None:
+    """Persist (proposed, accepted) pairs when the user edits LLM output in the review UI.
+
+    Only saves corrections where accepted=True AND the user actually modified the
+    proposed text (proposed != accepted).  Rejections carry no training signal.
+    """
+    from scripts.db import save_resume_correction as _save_correction
+
+    sections = {s["section"]: s for s in (draft.get("sections") or [])}
+
+    # ── Summary correction ────────────────────────────────────────────────────
+    summary_dec = decisions.get("summary", {})
+    if summary_dec.get("accepted", True):
+        edited_text = summary_dec.get("edited_text")
+        proposed_summary = sections.get("summary", {}).get("proposed", "")
+        if edited_text is not None and edited_text.strip() != proposed_summary.strip():
+            _save_correction(db_path, job_id, "summary", proposed_summary, edited_text.strip())
+
+    # ── Experience bullet corrections ─────────────────────────────────────────
+    exp_sec = sections.get("experience", {})
+    entry_diffs = {
+        f"{e['title']}|{e['company']}": e
+        for e in (exp_sec.get("entries") or [])
+    }
+    for entry_dec in (decisions.get("experience", {}).get("accepted_entries") or []):
+        if not entry_dec.get("accepted", True):
+            continue
+        edited_bullets = entry_dec.get("edited_bullets")
+        if edited_bullets is None:
+            continue
+        key = f"{entry_dec.get('title', '')}|{entry_dec.get('company', '')}"
+        diff = entry_diffs.get(key)
+        if diff is None:
+            continue
+        proposed_bullets = diff.get("proposed_bullets") or []
+        cleaned = [b for b in edited_bullets if b.strip()]
+        if cleaned != proposed_bullets:
+            _save_correction(db_path, job_id, f"experience:{key}", proposed_bullets, cleaned)
+
+
 @app.get("/api/jobs/{job_id}/resume_optimizer/review")
 def get_resume_review(job_id: int):
     """Return the pending review draft for this job (populated when task is awaiting_review)."""
@@ -692,6 +752,10 @@ def preview_resume_review(job_id: int, body: ResumeReviewBody):
     # Step 1: apply section-level decisions
     struct = apply_review_decisions(draft, body.decisions)
 
+    # Step 1b: capture (proposed, accepted) correction pairs for Avocet fine-tuning.
+    # Only fires when accepted=True and the user actually edited the LLM output.
+    _capture_review_corrections(db_path, job_id, draft, body.decisions)
+
     # Step 2: inject gap framing for rejected skills (adjacent / learning)
     framings = [f.model_dump() for f in body.gap_framings if f.mode in ("adjacent", "learning")]
     if framings:
@@ -713,6 +777,19 @@ def preview_resume_review(job_id: int, body: ResumeReviewBody):
     return {"preview_text": preview_text, "preview_struct": struct}
 
 
+@app.get("/api/resume_optimizer/corrections")
+def list_resume_corrections(job_id: int | None = None, limit: int = 200):
+    """Return resume review correction pairs for Avocet import.
+
+    Each record is a (proposed, accepted) pair from the review UI where the
+    user edited the LLM output before accepting.  These are SFT (supervised
+    fine-tuning) candidates that flow through Avocet for human review.
+    """
+    from scripts.db import get_resume_corrections as _get_corrections
+    db_path = Path(_request_db.get() or DB_PATH)
+    return {"corrections": _get_corrections(db_path, limit=limit, job_id=job_id)}
+
+
 @app.post("/api/jobs/{job_id}/resume_optimizer/approve")
 def approve_resume(job_id: int, body: dict):
     """Save the user-approved assembled resume struct and mark the task complete.