From 812c934822c71399f5322268f2959e2f5fca9d31 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Wed, 13 May 2026 08:20:29 -0700
Subject: [PATCH] feat: try cf-orch task endpoint first; fall back to direct
 model call

POST /api/inference/task with product=turnstone task=log_analysis routes to
the security reasoning model assigned in cf-orch. Falls back to the OpenAI-
compat /v1/chat/completions path on 404 (no assignment) or if the task
endpoint is absent (local instances, example-node).
---
 app/services/llm.py | 46 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/app/services/llm.py b/app/services/llm.py
index fc92c70..cca9a00 100644
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -32,6 +32,14 @@ def _build_context(entries: list[SearchResult], max_entries: int = 25) -> str:
     )
 
 
+def _extract_content(resp_json: dict) -> str | None:
+    """Pull text content from an OpenAI-compat chat completion response."""
+    choices = resp_json.get("choices") or []
+    if not choices:
+        return None
+    return (choices[0].get("message", {}).get("content") or "").strip() or None
+
+
 def summarize(
     query: str,
     entries: list[SearchResult],
@@ -45,22 +53,44 @@ def summarize(
     log_block = _build_context(entries)
     prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block)
     headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+    messages = [{"role": "user", "content": prompt}]
+
+    # Try cf-orch task-based endpoint first (routes to the security reasoning model
+    # assigned to turnstone.log_analysis without needing an explicit model name).
+    task_url = f"{llm_url.rstrip('/')}/api/inference/task"
     try:
         resp = httpx.post(
-            f"{llm_url.rstrip('/')}/v1/chat/completions",
+            task_url,
             json={
-                "model": llm_model,
-                "messages": [{"role": "user", "content": prompt}],
-                "stream": False,
+                "product": "turnstone",
+                "task": "log_analysis",
+                "payload": {"messages": messages, "stream": False},
             },
             headers=headers,
             timeout=timeout,
         )
+        if resp.status_code == 200:
+            return _extract_content(resp.json())
+        if resp.status_code != 404:
+            resp.raise_for_status()
+        # 404 means no assignment configured — fall through to direct model call
+        logger.debug("No task assignment for turnstone.log_analysis — falling back to direct model")
+    except httpx.HTTPStatusError:
+        raise
+    except Exception as exc:
+        logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc)
+
+    # Fallback: OpenAI-compat endpoint with explicit model name (local instances,
+    # example-node, or any cf-orch that doesn't have task assignments loaded).
+    try:
+        resp = httpx.post(
+            f"{llm_url.rstrip('/')}/v1/chat/completions",
+            json={"model": llm_model, "messages": messages, "stream": False},
+            headers=headers,
+            timeout=timeout,
+        )
         resp.raise_for_status()
-        choices = resp.json().get("choices") or []
-        if not choices:
-            return None
-        return (choices[0].get("message", {}).get("content") or "").strip() or None
+        return _extract_content(resp.json())
     except Exception as exc:
         logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc)
         return None