fix(diagnose): add max_tokens to all LLM calls; fix reasoning card contrast

Truncation fix: call_llm() in _llm_client.py now accepts max_tokens (default 2048) and passes it in both the cf-orch task payload and the OpenAI-compat fallback body. Hypothesizer uses max_tokens=1024 (JSON array output); synthesizer and legacy summarize use 2048 (structured 5-section narrative). Without this, backends use their own default (often 512 tokens), causing mid-sentence truncation of the diagnosis output. UI fix: reasoning card changed from bg-accent/5 border-accent/30 (opacity modifiers on CSS variables don't compose reliably across themes) to the callout pattern: bg-surface-raised with a solid border-l-4 border-accent. Header label changed from text-text-dim to text-accent for visual anchoring. Text remains text-text-primary for guaranteed contrast on both light and dark themes. Tracks: #56 (technical-level post-processor, filed as follow-on feature)
2026-05-27 22:23:36 -07:00 · 2026-05-27 22:23:36 -07:00 · 1c0a747c46
commit 1c0a747c46
parent 9196465946
4 changed files with 11 additions and 7 deletions
--- a/app/services/diagnose/_llm_client.py
+++ b/app/services/diagnose/_llm_client.py
@ -91,6 +91,7 @@ def call_llm(
    messages: list[dict],
    task_name: str = "log_analysis",
    timeout: float = 120.0,
    max_tokens: int = 2048,
 ) -> str | None:
    """Send messages to the LLM; return raw text or None on failure.
@ -106,6 +107,8 @@ def call_llm(
        messages:    OpenAI-style message list (system + user turns).
        task_name:   cf-orch task name for product-routed inference (default: ``log_analysis``).
        timeout:     Request timeout in seconds (default: 120).
        max_tokens:  Maximum tokens to generate (default: 2048). Prevents mid-sentence
                     truncation when the backend default is lower than the output needs.
    Returns:
        Raw text content string, or None if both paths fail.
@ -122,7 +125,7 @@ def call_llm(
            json={
                "product": "turnstone",
                "task": task_name,
-                "payload": {"messages": messages, "stream": False},
+                "payload": {"messages": messages, "stream": False, "max_tokens": max_tokens},
            },
            headers=headers,
            timeout=timeout,
@ -146,7 +149,7 @@ def call_llm(
    try:
        resp = httpx.post(
            f"{llm_url.rstrip('/')}/v1/chat/completions",
-            json={"model": llm_model, "messages": messages, "stream": False},
+            json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": max_tokens},
            headers=headers,
            timeout=timeout,
        )
--- a/app/services/diagnose/hypothesizer.py
+++ b/app/services/diagnose/hypothesizer.py
@ -116,6 +116,7 @@ class RootCauseHypothesizer:
            llm_model=llm_model,
            llm_api_key=llm_api_key,
            messages=messages,
            max_tokens=1024,  # JSON array of 2-4 hypotheses; 1024 is sufficient
        )
        if raw_response is None:
            return []
--- a/app/services/llm.py
+++ b/app/services/llm.py
@ -73,7 +73,7 @@ def summarize(
            json={
                "product": "turnstone",
                "task": "log_analysis",
-                "payload": {"messages": messages, "stream": False},
+                "payload": {"messages": messages, "stream": False, "max_tokens": 1024},
            },
            headers=headers,
            timeout=timeout,
@ -92,7 +92,7 @@ def summarize(
    try:
        resp = httpx.post(
            f"{llm_url.rstrip('/')}/v1/chat/completions",
-            json={"model": llm_model, "messages": messages, "stream": False},
+            json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": 1024},
            headers=headers,
            timeout=timeout,
        )
--- a/web/src/components/QuickCapture.vue
+++ b/web/src/components/QuickCapture.vue
@ -85,10 +85,10 @@
    <!-- LLM reasoning card -->
    <div
      v-if="reasoning"
-      class="mb-4 rounded border border-accent/30 bg-accent/5 p-4"
+      class="mb-4 rounded-r border-l-4 border-accent bg-surface-raised p-4"
    >
-      <div class="flex items-center gap-2 mb-2 text-xs text-text-dim font-medium uppercase tracking-wide">
+      <div class="flex items-center gap-2 mb-2 text-xs text-accent font-semibold uppercase tracking-wide">
-        <span>⚡</span>
+        <span aria-hidden="true">⚡</span>
        <span>Diagnosis</span>
      </div>
      <p class="text-sm text-text-primary leading-relaxed whitespace-pre-wrap">{{ reasoning }}</p>