diff --git a/app/services/diagnose/_llm_client.py b/app/services/diagnose/_llm_client.py index ea9c4f6..87e695d 100644 --- a/app/services/diagnose/_llm_client.py +++ b/app/services/diagnose/_llm_client.py @@ -91,6 +91,7 @@ def call_llm( messages: list[dict], task_name: str = "log_analysis", timeout: float = 120.0, + max_tokens: int = 2048, ) -> str | None: """Send messages to the LLM; return raw text or None on failure. @@ -106,6 +107,8 @@ def call_llm( messages: OpenAI-style message list (system + user turns). task_name: cf-orch task name for product-routed inference (default: ``log_analysis``). timeout: Request timeout in seconds (default: 120). + max_tokens: Maximum tokens to generate (default: 2048). Prevents mid-sentence + truncation when the backend default is lower than the output needs. Returns: Raw text content string, or None if both paths fail. @@ -122,7 +125,7 @@ def call_llm( json={ "product": "turnstone", "task": task_name, - "payload": {"messages": messages, "stream": False}, + "payload": {"messages": messages, "stream": False, "max_tokens": max_tokens}, }, headers=headers, timeout=timeout, @@ -146,7 +149,7 @@ def call_llm( try: resp = httpx.post( f"{llm_url.rstrip('/')}/v1/chat/completions", - json={"model": llm_model, "messages": messages, "stream": False}, + json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": max_tokens}, headers=headers, timeout=timeout, ) diff --git a/app/services/diagnose/hypothesizer.py b/app/services/diagnose/hypothesizer.py index 433d9fb..328e733 100644 --- a/app/services/diagnose/hypothesizer.py +++ b/app/services/diagnose/hypothesizer.py @@ -116,6 +116,7 @@ class RootCauseHypothesizer: llm_model=llm_model, llm_api_key=llm_api_key, messages=messages, + max_tokens=1024, # JSON array of 2-4 hypotheses; 1024 is sufficient ) if raw_response is None: return [] diff --git a/app/services/llm.py b/app/services/llm.py index 6d6c219..0b04098 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -73,7 +73,7 @@ def summarize( json={ "product": "turnstone", "task": "log_analysis", - "payload": {"messages": messages, "stream": False}, + "payload": {"messages": messages, "stream": False, "max_tokens": 1024}, }, headers=headers, timeout=timeout, @@ -92,7 +92,7 @@ def summarize( try: resp = httpx.post( f"{llm_url.rstrip('/')}/v1/chat/completions", - json={"model": llm_model, "messages": messages, "stream": False}, + json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": 1024}, headers=headers, timeout=timeout, ) diff --git a/web/src/components/QuickCapture.vue b/web/src/components/QuickCapture.vue index 0d40398..240c975 100644 --- a/web/src/components/QuickCapture.vue +++ b/web/src/components/QuickCapture.vue @@ -85,10 +85,10 @@
-
- +
+ Diagnosis

{{ reasoning }}