fix(diagnose): add max_tokens to all LLM calls; fix reasoning card contrast
Truncation fix: call_llm() in _llm_client.py now accepts max_tokens (default 2048) and passes it in both the cf-orch task payload and the OpenAI-compat fallback body. Hypothesizer uses max_tokens=1024 (JSON array output); synthesizer and legacy summarize use 2048 (structured 5-section narrative). Without this, backends use their own default (often 512 tokens), causing mid-sentence truncation of the diagnosis output. UI fix: reasoning card changed from bg-accent/5 border-accent/30 (opacity modifiers on CSS variables don't compose reliably across themes) to the callout pattern: bg-surface-raised with a solid border-l-4 border-accent. Header label changed from text-text-dim to text-accent for visual anchoring. Text remains text-text-primary for guaranteed contrast on both light and dark themes. Tracks: #56 (technical-level post-processor, filed as follow-on feature)
This commit is contained in:
parent
9196465946
commit
1c0a747c46
4 changed files with 11 additions and 7 deletions
|
|
@ -91,6 +91,7 @@ def call_llm(
|
||||||
messages: list[dict],
|
messages: list[dict],
|
||||||
task_name: str = "log_analysis",
|
task_name: str = "log_analysis",
|
||||||
timeout: float = 120.0,
|
timeout: float = 120.0,
|
||||||
|
max_tokens: int = 2048,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
"""Send messages to the LLM; return raw text or None on failure.
|
"""Send messages to the LLM; return raw text or None on failure.
|
||||||
|
|
||||||
|
|
@ -106,6 +107,8 @@ def call_llm(
|
||||||
messages: OpenAI-style message list (system + user turns).
|
messages: OpenAI-style message list (system + user turns).
|
||||||
task_name: cf-orch task name for product-routed inference (default: ``log_analysis``).
|
task_name: cf-orch task name for product-routed inference (default: ``log_analysis``).
|
||||||
timeout: Request timeout in seconds (default: 120).
|
timeout: Request timeout in seconds (default: 120).
|
||||||
|
max_tokens: Maximum tokens to generate (default: 2048). Prevents mid-sentence
|
||||||
|
truncation when the backend default is lower than the output needs.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Raw text content string, or None if both paths fail.
|
Raw text content string, or None if both paths fail.
|
||||||
|
|
@ -122,7 +125,7 @@ def call_llm(
|
||||||
json={
|
json={
|
||||||
"product": "turnstone",
|
"product": "turnstone",
|
||||||
"task": task_name,
|
"task": task_name,
|
||||||
"payload": {"messages": messages, "stream": False},
|
"payload": {"messages": messages, "stream": False, "max_tokens": max_tokens},
|
||||||
},
|
},
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
|
|
@ -146,7 +149,7 @@ def call_llm(
|
||||||
try:
|
try:
|
||||||
resp = httpx.post(
|
resp = httpx.post(
|
||||||
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
||||||
json={"model": llm_model, "messages": messages, "stream": False},
|
json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": max_tokens},
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,7 @@ class RootCauseHypothesizer:
|
||||||
llm_model=llm_model,
|
llm_model=llm_model,
|
||||||
llm_api_key=llm_api_key,
|
llm_api_key=llm_api_key,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
max_tokens=1024, # JSON array of 2-4 hypotheses; 1024 is sufficient
|
||||||
)
|
)
|
||||||
if raw_response is None:
|
if raw_response is None:
|
||||||
return []
|
return []
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ def summarize(
|
||||||
json={
|
json={
|
||||||
"product": "turnstone",
|
"product": "turnstone",
|
||||||
"task": "log_analysis",
|
"task": "log_analysis",
|
||||||
"payload": {"messages": messages, "stream": False},
|
"payload": {"messages": messages, "stream": False, "max_tokens": 1024},
|
||||||
},
|
},
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
|
|
@ -92,7 +92,7 @@ def summarize(
|
||||||
try:
|
try:
|
||||||
resp = httpx.post(
|
resp = httpx.post(
|
||||||
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
||||||
json={"model": llm_model, "messages": messages, "stream": False},
|
json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": 1024},
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -85,10 +85,10 @@
|
||||||
<!-- LLM reasoning card -->
|
<!-- LLM reasoning card -->
|
||||||
<div
|
<div
|
||||||
v-if="reasoning"
|
v-if="reasoning"
|
||||||
class="mb-4 rounded border border-accent/30 bg-accent/5 p-4"
|
class="mb-4 rounded-r border-l-4 border-accent bg-surface-raised p-4"
|
||||||
>
|
>
|
||||||
<div class="flex items-center gap-2 mb-2 text-xs text-text-dim font-medium uppercase tracking-wide">
|
<div class="flex items-center gap-2 mb-2 text-xs text-accent font-semibold uppercase tracking-wide">
|
||||||
<span>⚡</span>
|
<span aria-hidden="true">⚡</span>
|
||||||
<span>Diagnosis</span>
|
<span>Diagnosis</span>
|
||||||
</div>
|
</div>
|
||||||
<p class="text-sm text-text-primary leading-relaxed whitespace-pre-wrap">{{ reasoning }}</p>
|
<p class="text-sm text-text-primary leading-relaxed whitespace-pre-wrap">{{ reasoning }}</p>
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue