From 1c0a747c466c17f5e518da25ec5c274486b122c5 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 27 May 2026 22:23:36 -0700 Subject: [PATCH] fix(diagnose): add max_tokens to all LLM calls; fix reasoning card contrast Truncation fix: call_llm() in _llm_client.py now accepts max_tokens (default 2048) and passes it in both the cf-orch task payload and the OpenAI-compat fallback body. Hypothesizer uses max_tokens=1024 (JSON array output); synthesizer and legacy summarize use 2048 (structured 5-section narrative). Without this, backends use their own default (often 512 tokens), causing mid-sentence truncation of the diagnosis output. UI fix: reasoning card changed from bg-accent/5 border-accent/30 (opacity modifiers on CSS variables don't compose reliably across themes) to the callout pattern: bg-surface-raised with a solid border-l-4 border-accent. Header label changed from text-text-dim to text-accent for visual anchoring. Text remains text-text-primary for guaranteed contrast on both light and dark themes. Tracks: #56 (technical-level post-processor, filed as follow-on feature) --- app/services/diagnose/_llm_client.py | 7 +++++-- app/services/diagnose/hypothesizer.py | 1 + app/services/llm.py | 4 ++-- web/src/components/QuickCapture.vue | 6 +++--- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/app/services/diagnose/_llm_client.py b/app/services/diagnose/_llm_client.py index 7a93196..6c4a507 100644 --- a/app/services/diagnose/_llm_client.py +++ b/app/services/diagnose/_llm_client.py @@ -91,6 +91,7 @@ def call_llm( messages: list[dict], task_name: str = "log_analysis", timeout: float = 120.0, + max_tokens: int = 2048, ) -> str | None: """Send messages to the LLM; return raw text or None on failure. @@ -106,6 +107,8 @@ def call_llm( messages: OpenAI-style message list (system + user turns). task_name: cf-orch task name for product-routed inference (default: ``log_analysis``). timeout: Request timeout in seconds (default: 120). + max_tokens: Maximum tokens to generate (default: 2048). Prevents mid-sentence + truncation when the backend default is lower than the output needs. Returns: Raw text content string, or None if both paths fail. @@ -122,7 +125,7 @@ def call_llm( json={ "product": "turnstone", "task": task_name, - "payload": {"messages": messages, "stream": False}, + "payload": {"messages": messages, "stream": False, "max_tokens": max_tokens}, }, headers=headers, timeout=timeout, @@ -146,7 +149,7 @@ def call_llm( try: resp = httpx.post( f"{llm_url.rstrip('/')}/v1/chat/completions", - json={"model": llm_model, "messages": messages, "stream": False}, + json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": max_tokens}, headers=headers, timeout=timeout, ) diff --git a/app/services/diagnose/hypothesizer.py b/app/services/diagnose/hypothesizer.py index 433d9fb..328e733 100644 --- a/app/services/diagnose/hypothesizer.py +++ b/app/services/diagnose/hypothesizer.py @@ -116,6 +116,7 @@ class RootCauseHypothesizer: llm_model=llm_model, llm_api_key=llm_api_key, messages=messages, + max_tokens=1024, # JSON array of 2-4 hypotheses; 1024 is sufficient ) if raw_response is None: return [] diff --git a/app/services/llm.py b/app/services/llm.py index 9a33e10..6e26a16 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -73,7 +73,7 @@ def summarize( json={ "product": "turnstone", "task": "log_analysis", - "payload": {"messages": messages, "stream": False}, + "payload": {"messages": messages, "stream": False, "max_tokens": 1024}, }, headers=headers, timeout=timeout, @@ -92,7 +92,7 @@ def summarize( try: resp = httpx.post( f"{llm_url.rstrip('/')}/v1/chat/completions", - json={"model": llm_model, "messages": messages, "stream": False}, + json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": 1024}, headers=headers, timeout=timeout, ) diff --git a/web/src/components/QuickCapture.vue b/web/src/components/QuickCapture.vue index 0d40398..240c975 100644 --- a/web/src/components/QuickCapture.vue +++ b/web/src/components/QuickCapture.vue @@ -85,10 +85,10 @@
-
- +
+ Diagnosis

{{ reasoning }}