feat: try cf-orch task endpoint first; fall back to direct model call

POST /api/inference/task with product=turnstone task=log_analysis routes to
the security reasoning model assigned in cf-orch. Falls back to the OpenAI-
compat /v1/chat/completions path on 404 (no assignment) or if the task
endpoint is absent (local instances, example-node).
This commit is contained in:
pyr0ball 2026-05-13 08:20:29 -07:00
parent 729b78e40f
commit 812c934822

View file

@ -32,6 +32,14 @@ def _build_context(entries: list[SearchResult], max_entries: int = 25) -> str:
) )
def _extract_content(resp_json: dict) -> str | None:
"""Pull text content from an OpenAI-compat chat completion response."""
choices = resp_json.get("choices") or []
if not choices:
return None
return (choices[0].get("message", {}).get("content") or "").strip() or None
def summarize( def summarize(
query: str, query: str,
entries: list[SearchResult], entries: list[SearchResult],
@ -45,22 +53,44 @@ def summarize(
log_block = _build_context(entries) log_block = _build_context(entries)
prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block) prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block)
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {} headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
messages = [{"role": "user", "content": prompt}]
# Try cf-orch task-based endpoint first (routes to the security reasoning model
# assigned to turnstone.log_analysis without needing an explicit model name).
task_url = f"{llm_url.rstrip('/')}/api/inference/task"
try: try:
resp = httpx.post( resp = httpx.post(
f"{llm_url.rstrip('/')}/v1/chat/completions", task_url,
json={ json={
"model": llm_model, "product": "turnstone",
"messages": [{"role": "user", "content": prompt}], "task": "log_analysis",
"stream": False, "payload": {"messages": messages, "stream": False},
}, },
headers=headers, headers=headers,
timeout=timeout, timeout=timeout,
) )
if resp.status_code == 200:
return _extract_content(resp.json())
if resp.status_code != 404:
resp.raise_for_status()
# 404 means no assignment configured — fall through to direct model call
logger.debug("No task assignment for turnstone.log_analysis — falling back to direct model")
except httpx.HTTPStatusError:
raise
except Exception as exc:
logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc)
# Fallback: OpenAI-compat endpoint with explicit model name (local instances,
# example-node, or any cf-orch that doesn't have task assignments loaded).
try:
resp = httpx.post(
f"{llm_url.rstrip('/')}/v1/chat/completions",
json={"model": llm_model, "messages": messages, "stream": False},
headers=headers,
timeout=timeout,
)
resp.raise_for_status() resp.raise_for_status()
choices = resp.json().get("choices") or [] return _extract_content(resp.json())
if not choices:
return None
return (choices[0].get("message", {}).get("content") or "").strip() or None
except Exception as exc: except Exception as exc:
logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc) logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc)
return None return None