diff --git a/app/rest.py b/app/rest.py index 8f6ea2e..843e0fd 100644 --- a/app/rest.py +++ b/app/rest.py @@ -79,6 +79,7 @@ _PREFS_DEFAULTS: dict = { "entry_point_style": "topbar", "llm_url": "http://localhost:11434", "llm_model": "llama3.1:8b", + "llm_api_key": "", "severity_overrides": [ { "name": "PAM auth noise", @@ -121,6 +122,7 @@ class SettingsBody(BaseModel): entry_point_style: str | None = None llm_url: str | None = None llm_model: str | None = None + llm_api_key: str | None = None severity_overrides: list[SeverityOverride] | None = None @@ -249,6 +251,7 @@ def diagnose_post(body: DiagnoseRequest) -> dict: until=body.until, llm_url=prefs.get("llm_url") or None, llm_model=prefs.get("llm_model") or None, + llm_api_key=prefs.get("llm_api_key") or None, ) return { "summary": result["summary"], @@ -273,6 +276,8 @@ def patch_settings(body: SettingsBody) -> dict: prefs["llm_url"] = body.llm_url if body.llm_model is not None: prefs["llm_model"] = body.llm_model + if body.llm_api_key is not None: + prefs["llm_api_key"] = body.llm_api_key if body.severity_overrides is not None: prefs["severity_overrides"] = [o.model_dump() for o in body.severity_overrides] _save_prefs(prefs) diff --git a/app/services/diagnose.py b/app/services/diagnose.py index 516b7d8..e82b0fe 100644 --- a/app/services/diagnose.py +++ b/app/services/diagnose.py @@ -51,6 +51,7 @@ def diagnose( until: str | None = None, llm_url: str | None = None, llm_model: str | None = None, + llm_api_key: str | None = None, ) -> dict[str, Any]: """Run layered log search with NL time extraction. Returns summary + entries.""" time_detected = since is not None and until is not None @@ -84,7 +85,7 @@ def diagnose( reasoning: str | None = None if llm_url and llm_model: - reasoning = summarize(query, combined, llm_url=llm_url, llm_model=llm_model) + reasoning = summarize(query, combined, llm_url=llm_url, llm_model=llm_model, api_key=llm_api_key) return { "summary": { diff --git a/app/services/llm.py b/app/services/llm.py index 6bfa542..4fd13c9 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -37,20 +37,30 @@ def summarize( entries: list[SearchResult], llm_url: str, llm_model: str, + api_key: str | None = None, timeout: float = 20.0, ) -> str | None: if not entries: return None log_block = _build_context(entries) prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block) + headers = {"Authorization": f"Bearer {api_key}"} if api_key else {} try: resp = httpx.post( - f"{llm_url.rstrip('/')}/api/generate", - json={"model": llm_model, "prompt": prompt, "stream": False}, + f"{llm_url.rstrip('/')}/v1/chat/completions", + json={ + "model": llm_model, + "messages": [{"role": "user", "content": prompt}], + "stream": False, + }, + headers=headers, timeout=timeout, ) resp.raise_for_status() - return resp.json().get("response", "").strip() or None + choices = resp.json().get("choices") or [] + if not choices: + return None + return (choices[0].get("message", {}).get("content") or "").strip() or None except Exception as exc: logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc) return None diff --git a/tests/test_services_llm.py b/tests/test_services_llm.py index 447091f..3fb6eca 100644 --- a/tests/test_services_llm.py +++ b/tests/test_services_llm.py @@ -39,7 +39,16 @@ def test_summarize_returns_none_on_http_error(): def test_summarize_returns_none_on_empty_response(): mock_resp = MagicMock() mock_resp.raise_for_status.return_value = None - mock_resp.json.return_value = {"response": ""} + mock_resp.json.return_value = {"choices": [{"message": {"content": ""}}]} + with patch("app.services.llm.httpx.post", return_value=mock_resp): + result = summarize("query", [_entry("x")], "http://host", "llama3") + assert result is None + + +def test_summarize_returns_none_on_missing_choices(): + mock_resp = MagicMock() + mock_resp.raise_for_status.return_value = None + mock_resp.json.return_value = {"choices": []} with patch("app.services.llm.httpx.post", return_value=mock_resp): result = summarize("query", [_entry("x")], "http://host", "llama3") assert result is None @@ -48,12 +57,22 @@ def test_summarize_returns_none_on_empty_response(): def test_summarize_returns_text_on_success(): mock_resp = MagicMock() mock_resp.raise_for_status.return_value = None - mock_resp.json.return_value = {"response": "Ollama exited with code 1."} + mock_resp.json.return_value = {"choices": [{"message": {"content": "Ollama exited with code 1."}}]} with patch("app.services.llm.httpx.post", return_value=mock_resp): result = summarize("ollama crashed", [_entry("Failed")], "http://host", "llama3") assert result == "Ollama exited with code 1." +def test_summarize_sends_bearer_token(): + mock_resp = MagicMock() + mock_resp.raise_for_status.return_value = None + mock_resp.json.return_value = {"choices": [{"message": {"content": "disk full"}}]} + with patch("app.services.llm.httpx.post", return_value=mock_resp) as mock_post: + summarize("disk error", [_entry("ENOSPC")], "http://host", "llama3", api_key="test-key") + call_kwargs = mock_post.call_args + assert call_kwargs.kwargs["headers"] == {"Authorization": "Bearer test-key"} + + def test_build_context_sorts_errors_first(): entries = [ _entry("info message", severity="INFO"), diff --git a/web/src/views/SettingsView.vue b/web/src/views/SettingsView.vue index dd8a268..85f11ea 100644 --- a/web/src/views/SettingsView.vue +++ b/web/src/views/SettingsView.vue @@ -36,11 +36,13 @@

LLM Reasoning

- Ollama endpoint used to generate plain-language diagnoses. Leave blank to disable. + LLM endpoint for plain-language diagnoses. Works with local Ollama or a remote + cf-orch coordinator (e.g. https://orch.circuitforge.tech). + Leave blank to disable.

- +
+
+ + +