feat: try cf-orch task endpoint first; fall back to direct model call
POST /api/inference/task with product=turnstone task=log_analysis routes to the security reasoning model assigned in cf-orch. Falls back to the OpenAI- compat /v1/chat/completions path on 404 (no assignment) or if the task endpoint is absent (local instances, example-node).
This commit is contained in:
parent
729b78e40f
commit
812c934822
1 changed files with 38 additions and 8 deletions
|
|
@ -32,6 +32,14 @@ def _build_context(entries: list[SearchResult], max_entries: int = 25) -> str:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_content(resp_json: dict) -> str | None:
|
||||||
|
"""Pull text content from an OpenAI-compat chat completion response."""
|
||||||
|
choices = resp_json.get("choices") or []
|
||||||
|
if not choices:
|
||||||
|
return None
|
||||||
|
return (choices[0].get("message", {}).get("content") or "").strip() or None
|
||||||
|
|
||||||
|
|
||||||
def summarize(
|
def summarize(
|
||||||
query: str,
|
query: str,
|
||||||
entries: list[SearchResult],
|
entries: list[SearchResult],
|
||||||
|
|
@ -45,22 +53,44 @@ def summarize(
|
||||||
log_block = _build_context(entries)
|
log_block = _build_context(entries)
|
||||||
prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block)
|
prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block)
|
||||||
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
||||||
|
messages = [{"role": "user", "content": prompt}]
|
||||||
|
|
||||||
|
# Try cf-orch task-based endpoint first (routes to the security reasoning model
|
||||||
|
# assigned to turnstone.log_analysis without needing an explicit model name).
|
||||||
|
task_url = f"{llm_url.rstrip('/')}/api/inference/task"
|
||||||
try:
|
try:
|
||||||
resp = httpx.post(
|
resp = httpx.post(
|
||||||
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
task_url,
|
||||||
json={
|
json={
|
||||||
"model": llm_model,
|
"product": "turnstone",
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"task": "log_analysis",
|
||||||
"stream": False,
|
"payload": {"messages": messages, "stream": False},
|
||||||
},
|
},
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return _extract_content(resp.json())
|
||||||
|
if resp.status_code != 404:
|
||||||
|
resp.raise_for_status()
|
||||||
|
# 404 means no assignment configured — fall through to direct model call
|
||||||
|
logger.debug("No task assignment for turnstone.log_analysis — falling back to direct model")
|
||||||
|
except httpx.HTTPStatusError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc)
|
||||||
|
|
||||||
|
# Fallback: OpenAI-compat endpoint with explicit model name (local instances,
|
||||||
|
# example-node, or any cf-orch that doesn't have task assignments loaded).
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
||||||
|
json={"model": llm_model, "messages": messages, "stream": False},
|
||||||
|
headers=headers,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
choices = resp.json().get("choices") or []
|
return _extract_content(resp.json())
|
||||||
if not choices:
|
|
||||||
return None
|
|
||||||
return (choices[0].get("message", {}).get("content") or "").strip() or None
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc)
|
logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc)
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue