diff --git a/config/llm.cloud.yaml b/config/llm.cloud.yaml
index a0173f6..64aa127 100644
--- a/config/llm.cloud.yaml
+++ b/config/llm.cloud.yaml
@@ -1,4 +1,14 @@
 backends:
+  cf_text:
+    api_key: any
+    base_url: http://host.docker.internal:8008/v1
+    enabled: true
+    model: cf-text
+    supports_images: false
+    type: openai_compat
+    cf_orch:
+      service: cf-text
+      ttl_s: 300
   anthropic:
     api_key_env: ANTHROPIC_API_KEY
     enabled: false
@@ -26,6 +36,9 @@ backends:
     model: llama3.1:8b  # generic — no personal fine-tunes in cloud
     supports_images: false
     type: openai_compat
+    cf_orch:
+      service: ollama
+      ttl_s: 300
   ollama_research:
     api_key: ollama
     base_url: http://host.docker.internal:11434/v1
@@ -33,6 +46,9 @@ backends:
     model: llama3.1:8b
     supports_images: false
     type: openai_compat
+    cf_orch:
+      service: ollama
+      ttl_s: 300
   vision_service:
     base_url: http://host.docker.internal:8002
     enabled: true
@@ -63,9 +79,11 @@ backends:
         - Qwen2.5-3B-Instruct
       ttl_s: 300
 fallback_order:
+- cf_text
 - vllm
 - ollama
 research_fallback_order:
+- cf_text
 - vllm_research
 - ollama_research
 vision_fallback_order:
diff --git a/config/llm.yaml b/config/llm.yaml
index 515bf24..95f4383 100644
--- a/config/llm.yaml
+++ b/config/llm.yaml
@@ -1,11 +1,14 @@
 backends:
   cf_text:
     api_key: any
-    base_url: http://host.docker.internal:8006/v1
+    base_url: http://host.docker.internal:8008/v1
     enabled: true
     model: cf-text
     supports_images: false
     type: openai_compat
+    cf_orch:
+      service: cf-text
+      ttl_s: 300
   anthropic:
     api_key_env: ANTHROPIC_API_KEY
     enabled: false
@@ -33,13 +36,19 @@ backends:
     model: llama3.2:3b
     supports_images: false
     type: openai_compat
+    cf_orch:
+      service: ollama
+      ttl_s: 300
   ollama_research:
     api_key: ollama
-    base_url: http://ollama_research:11434/v1
+    base_url: http://host.docker.internal:11435/v1
     enabled: true
     model: llama3.1:8b
     supports_images: false
     type: openai_compat
+    cf_orch:
+      service: ollama
+      ttl_s: 300
   vision_service:
     base_url: http://vision:8002
     enabled: true
@@ -64,6 +73,11 @@ backends:
     model: __auto__
     supports_images: false
     type: openai_compat
+    cf_orch:
+      service: vllm
+      model_candidates:
+      - Qwen2.5-3B-Instruct
+      ttl_s: 300
 fallback_order:
 - cf_text
 - ollama
@@ -72,10 +86,10 @@ fallback_order:
 - github_copilot
 - anthropic
 research_fallback_order:
-- claude_code
+- cf_text
 - vllm_research
 - ollama_research
-- cf_text
+- claude_code
 - github_copilot
 - anthropic
 vision_fallback_order:
diff --git a/dev-api.py b/dev-api.py
index 615d575..52d1003 100644
--- a/dev-api.py
+++ b/dev-api.py
@@ -80,7 +80,7 @@ _RL_COVER_LETTER = os.environ.get("LLM_RATE_COVER_LETTER", "20/hour")
 _RL_RESEARCH     = os.environ.get("LLM_RATE_RESEARCH", "10/hour")
 _RL_QA_SUGGEST   = os.environ.get("LLM_RATE_QA_SUGGEST", "60/hour")
 _RL_SURVEY       = os.environ.get("LLM_RATE_SURVEY", "30/hour")
-_RL_WIZARD       = os.environ.get("LLM_RATE_WIZARD", "60/hour")  # TODO(#122): wire to wizard/ai/interview after feat/77 merges
+_RL_WIZARD       = os.environ.get("LLM_RATE_WIZARD", "60/hour")
 
 # Resolve GPU inference server URL.
 # Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → cloud default when licensed.
@@ -4654,7 +4654,8 @@ _WIZARD_ALLOWED_FIELDS: frozenset[str] = frozenset({
 
 
 @app.post("/api/wizard/ai/interview")
-def wizard_ai_interview(request: WizardInterviewRequest):
+@limiter.limit(_RL_WIZARD)
+def wizard_ai_interview(request: Request, body: WizardInterviewRequest):
     """Conduct one turn of the AI-guided profile interview. Tier-gated (BYOK-unlockable)."""
     from app.wizard.tiers import can_use, has_configured_llm
 
@@ -4664,7 +4665,7 @@ def wizard_ai_interview(request: WizardInterviewRequest):
 
     # Build conversation prompt from history
     conversation_lines = []
-    for msg in request.history:
+    for msg in body.history:
         role = msg.role
         content = msg.content.replace("\n", " ").replace("\r", "")
         if role == "user":
@@ -4675,10 +4676,10 @@ def wizard_ai_interview(request: WizardInterviewRequest):
     history_block = "\n".join(conversation_lines) if conversation_lines else "User: (starting conversation)"
 
     # Build profile summary to give LLM context about what's already known
-    if request.profile_so_far:
+    if body.profile_so_far:
         gathered = ", ".join(
             f"{k}={repr(v)}"
-            for k, v in request.profile_so_far.items()
+            for k, v in body.profile_so_far.items()
             if v not in (None, "", [], {})
         )
         profile_context = f"\n\n[Already gathered: {gathered}]" if gathered else ""