From 812c934822c71399f5322268f2959e2f5fca9d31 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 13 May 2026 08:20:29 -0700 Subject: [PATCH] feat: try cf-orch task endpoint first; fall back to direct model call POST /api/inference/task with product=turnstone task=log_analysis routes to the security reasoning model assigned in cf-orch. Falls back to the OpenAI- compat /v1/chat/completions path on 404 (no assignment) or if the task endpoint is absent (local instances, example-node). --- app/services/llm.py | 46 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/app/services/llm.py b/app/services/llm.py index fc92c70..cca9a00 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -32,6 +32,14 @@ def _build_context(entries: list[SearchResult], max_entries: int = 25) -> str: ) +def _extract_content(resp_json: dict) -> str | None: + """Pull text content from an OpenAI-compat chat completion response.""" + choices = resp_json.get("choices") or [] + if not choices: + return None + return (choices[0].get("message", {}).get("content") or "").strip() or None + + def summarize( query: str, entries: list[SearchResult], @@ -45,22 +53,44 @@ def summarize( log_block = _build_context(entries) prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block) headers = {"Authorization": f"Bearer {api_key}"} if api_key else {} + messages = [{"role": "user", "content": prompt}] + + # Try cf-orch task-based endpoint first (routes to the security reasoning model + # assigned to turnstone.log_analysis without needing an explicit model name). + task_url = f"{llm_url.rstrip('/')}/api/inference/task" try: resp = httpx.post( - f"{llm_url.rstrip('/')}/v1/chat/completions", + task_url, json={ - "model": llm_model, - "messages": [{"role": "user", "content": prompt}], - "stream": False, + "product": "turnstone", + "task": "log_analysis", + "payload": {"messages": messages, "stream": False}, }, headers=headers, timeout=timeout, ) + if resp.status_code == 200: + return _extract_content(resp.json()) + if resp.status_code != 404: + resp.raise_for_status() + # 404 means no assignment configured — fall through to direct model call + logger.debug("No task assignment for turnstone.log_analysis — falling back to direct model") + except httpx.HTTPStatusError: + raise + except Exception as exc: + logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc) + + # Fallback: OpenAI-compat endpoint with explicit model name (local instances, + # example-node, or any cf-orch that doesn't have task assignments loaded). + try: + resp = httpx.post( + f"{llm_url.rstrip('/')}/v1/chat/completions", + json={"model": llm_model, "messages": messages, "stream": False}, + headers=headers, + timeout=timeout, + ) resp.raise_for_status() - choices = resp.json().get("choices") or [] - if not choices: - return None - return (choices[0].get("message", {}).get("content") or "").strip() or None + return _extract_content(resp.json()) except Exception as exc: logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc) return None