feat: switch LLM backend to OpenAI-compat; add cf-orch remote inference support
Turnstone now calls /v1/chat/completions instead of Ollama's /api/generate. This format works with both local Ollama (>=0.1.24) and a remote cf-orch coordinator, enabling GPU-less nodes like Xander's to route diagnoses through the cluster without any local model. - llm.py: OpenAI-compat messages format, optional Bearer auth header - diagnose.py: thread llm_api_key through the call chain - rest.py: llm_api_key pref (default empty), SettingsBody field, passed to diagnose - SettingsView.vue: API Key field, label updated from "Ollama URL" to "LLM Endpoint URL" - tests: updated mocks for new response shape; added bearer token assertion test
This commit is contained in:
parent
4f93c30c01
commit
cae9cd7eee
5 changed files with 60 additions and 10 deletions
|
|
@ -79,6 +79,7 @@ _PREFS_DEFAULTS: dict = {
|
||||||
"entry_point_style": "topbar",
|
"entry_point_style": "topbar",
|
||||||
"llm_url": "http://localhost:11434",
|
"llm_url": "http://localhost:11434",
|
||||||
"llm_model": "llama3.1:8b",
|
"llm_model": "llama3.1:8b",
|
||||||
|
"llm_api_key": "",
|
||||||
"severity_overrides": [
|
"severity_overrides": [
|
||||||
{
|
{
|
||||||
"name": "PAM auth noise",
|
"name": "PAM auth noise",
|
||||||
|
|
@ -121,6 +122,7 @@ class SettingsBody(BaseModel):
|
||||||
entry_point_style: str | None = None
|
entry_point_style: str | None = None
|
||||||
llm_url: str | None = None
|
llm_url: str | None = None
|
||||||
llm_model: str | None = None
|
llm_model: str | None = None
|
||||||
|
llm_api_key: str | None = None
|
||||||
severity_overrides: list[SeverityOverride] | None = None
|
severity_overrides: list[SeverityOverride] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -249,6 +251,7 @@ def diagnose_post(body: DiagnoseRequest) -> dict:
|
||||||
until=body.until,
|
until=body.until,
|
||||||
llm_url=prefs.get("llm_url") or None,
|
llm_url=prefs.get("llm_url") or None,
|
||||||
llm_model=prefs.get("llm_model") or None,
|
llm_model=prefs.get("llm_model") or None,
|
||||||
|
llm_api_key=prefs.get("llm_api_key") or None,
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"summary": result["summary"],
|
"summary": result["summary"],
|
||||||
|
|
@ -273,6 +276,8 @@ def patch_settings(body: SettingsBody) -> dict:
|
||||||
prefs["llm_url"] = body.llm_url
|
prefs["llm_url"] = body.llm_url
|
||||||
if body.llm_model is not None:
|
if body.llm_model is not None:
|
||||||
prefs["llm_model"] = body.llm_model
|
prefs["llm_model"] = body.llm_model
|
||||||
|
if body.llm_api_key is not None:
|
||||||
|
prefs["llm_api_key"] = body.llm_api_key
|
||||||
if body.severity_overrides is not None:
|
if body.severity_overrides is not None:
|
||||||
prefs["severity_overrides"] = [o.model_dump() for o in body.severity_overrides]
|
prefs["severity_overrides"] = [o.model_dump() for o in body.severity_overrides]
|
||||||
_save_prefs(prefs)
|
_save_prefs(prefs)
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@ def diagnose(
|
||||||
until: str | None = None,
|
until: str | None = None,
|
||||||
llm_url: str | None = None,
|
llm_url: str | None = None,
|
||||||
llm_model: str | None = None,
|
llm_model: str | None = None,
|
||||||
|
llm_api_key: str | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Run layered log search with NL time extraction. Returns summary + entries."""
|
"""Run layered log search with NL time extraction. Returns summary + entries."""
|
||||||
time_detected = since is not None and until is not None
|
time_detected = since is not None and until is not None
|
||||||
|
|
@ -84,7 +85,7 @@ def diagnose(
|
||||||
|
|
||||||
reasoning: str | None = None
|
reasoning: str | None = None
|
||||||
if llm_url and llm_model:
|
if llm_url and llm_model:
|
||||||
reasoning = summarize(query, combined, llm_url=llm_url, llm_model=llm_model)
|
reasoning = summarize(query, combined, llm_url=llm_url, llm_model=llm_model, api_key=llm_api_key)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"summary": {
|
"summary": {
|
||||||
|
|
|
||||||
|
|
@ -37,20 +37,30 @@ def summarize(
|
||||||
entries: list[SearchResult],
|
entries: list[SearchResult],
|
||||||
llm_url: str,
|
llm_url: str,
|
||||||
llm_model: str,
|
llm_model: str,
|
||||||
|
api_key: str | None = None,
|
||||||
timeout: float = 20.0,
|
timeout: float = 20.0,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
if not entries:
|
if not entries:
|
||||||
return None
|
return None
|
||||||
log_block = _build_context(entries)
|
log_block = _build_context(entries)
|
||||||
prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block)
|
prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block)
|
||||||
|
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
||||||
try:
|
try:
|
||||||
resp = httpx.post(
|
resp = httpx.post(
|
||||||
f"{llm_url.rstrip('/')}/api/generate",
|
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
||||||
json={"model": llm_model, "prompt": prompt, "stream": False},
|
json={
|
||||||
|
"model": llm_model,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
},
|
||||||
|
headers=headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp.json().get("response", "").strip() or None
|
choices = resp.json().get("choices") or []
|
||||||
|
if not choices:
|
||||||
|
return None
|
||||||
|
return (choices[0].get("message", {}).get("content") or "").strip() or None
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc)
|
logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc)
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,16 @@ def test_summarize_returns_none_on_http_error():
|
||||||
def test_summarize_returns_none_on_empty_response():
|
def test_summarize_returns_none_on_empty_response():
|
||||||
mock_resp = MagicMock()
|
mock_resp = MagicMock()
|
||||||
mock_resp.raise_for_status.return_value = None
|
mock_resp.raise_for_status.return_value = None
|
||||||
mock_resp.json.return_value = {"response": ""}
|
mock_resp.json.return_value = {"choices": [{"message": {"content": ""}}]}
|
||||||
|
with patch("app.services.llm.httpx.post", return_value=mock_resp):
|
||||||
|
result = summarize("query", [_entry("x")], "http://host", "llama3")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_summarize_returns_none_on_missing_choices():
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.raise_for_status.return_value = None
|
||||||
|
mock_resp.json.return_value = {"choices": []}
|
||||||
with patch("app.services.llm.httpx.post", return_value=mock_resp):
|
with patch("app.services.llm.httpx.post", return_value=mock_resp):
|
||||||
result = summarize("query", [_entry("x")], "http://host", "llama3")
|
result = summarize("query", [_entry("x")], "http://host", "llama3")
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
@ -48,12 +57,22 @@ def test_summarize_returns_none_on_empty_response():
|
||||||
def test_summarize_returns_text_on_success():
|
def test_summarize_returns_text_on_success():
|
||||||
mock_resp = MagicMock()
|
mock_resp = MagicMock()
|
||||||
mock_resp.raise_for_status.return_value = None
|
mock_resp.raise_for_status.return_value = None
|
||||||
mock_resp.json.return_value = {"response": "Ollama exited with code 1."}
|
mock_resp.json.return_value = {"choices": [{"message": {"content": "Ollama exited with code 1."}}]}
|
||||||
with patch("app.services.llm.httpx.post", return_value=mock_resp):
|
with patch("app.services.llm.httpx.post", return_value=mock_resp):
|
||||||
result = summarize("ollama crashed", [_entry("Failed")], "http://host", "llama3")
|
result = summarize("ollama crashed", [_entry("Failed")], "http://host", "llama3")
|
||||||
assert result == "Ollama exited with code 1."
|
assert result == "Ollama exited with code 1."
|
||||||
|
|
||||||
|
|
||||||
|
def test_summarize_sends_bearer_token():
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.raise_for_status.return_value = None
|
||||||
|
mock_resp.json.return_value = {"choices": [{"message": {"content": "disk full"}}]}
|
||||||
|
with patch("app.services.llm.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
summarize("disk error", [_entry("ENOSPC")], "http://host", "llama3", api_key="test-key")
|
||||||
|
call_kwargs = mock_post.call_args
|
||||||
|
assert call_kwargs.kwargs["headers"] == {"Authorization": "Bearer test-key"}
|
||||||
|
|
||||||
|
|
||||||
def test_build_context_sorts_errors_first():
|
def test_build_context_sorts_errors_first():
|
||||||
entries = [
|
entries = [
|
||||||
_entry("info message", severity="INFO"),
|
_entry("info message", severity="INFO"),
|
||||||
|
|
|
||||||
|
|
@ -36,11 +36,13 @@
|
||||||
<div>
|
<div>
|
||||||
<h2 class="text-text-primary text-sm font-semibold mb-1">LLM Reasoning</h2>
|
<h2 class="text-text-primary text-sm font-semibold mb-1">LLM Reasoning</h2>
|
||||||
<p class="text-text-dim text-xs mb-3">
|
<p class="text-text-dim text-xs mb-3">
|
||||||
Ollama endpoint used to generate plain-language diagnoses. Leave blank to disable.
|
LLM endpoint for plain-language diagnoses. Works with local Ollama or a remote
|
||||||
|
cf-orch coordinator (e.g. <span class="font-mono">https://orch.circuitforge.tech</span>).
|
||||||
|
Leave blank to disable.
|
||||||
</p>
|
</p>
|
||||||
<div class="space-y-3">
|
<div class="space-y-3">
|
||||||
<div>
|
<div>
|
||||||
<label class="block text-xs text-text-dim mb-1">Ollama URL</label>
|
<label class="block text-xs text-text-dim mb-1">LLM Endpoint URL</label>
|
||||||
<input
|
<input
|
||||||
v-model="prefs.llm_url"
|
v-model="prefs.llm_url"
|
||||||
type="text"
|
type="text"
|
||||||
|
|
@ -57,6 +59,18 @@
|
||||||
class="w-full bg-surface border border-surface-border rounded px-3 py-2 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent transition-colors"
|
class="w-full bg-surface border border-surface-border rounded px-3 py-2 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent transition-colors"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">
|
||||||
|
API Key
|
||||||
|
<span class="text-text-dim font-normal">(optional — required for cf-orch remote inference)</span>
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
v-model="prefs.llm_api_key"
|
||||||
|
type="password"
|
||||||
|
placeholder="Leave blank for local Ollama"
|
||||||
|
class="w-full bg-surface border border-surface-border rounded px-3 py-2 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent transition-colors"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
<button
|
<button
|
||||||
@click="saveLlm"
|
@click="saveLlm"
|
||||||
class="px-4 py-2 bg-accent text-surface text-sm rounded font-medium hover:opacity-90 transition-opacity"
|
class="px-4 py-2 bg-accent text-surface text-sm rounded font-medium hover:opacity-90 transition-opacity"
|
||||||
|
|
@ -171,10 +185,11 @@ interface Prefs {
|
||||||
entry_point_style: 'topbar' | 'fab'
|
entry_point_style: 'topbar' | 'fab'
|
||||||
llm_url: string
|
llm_url: string
|
||||||
llm_model: string
|
llm_model: string
|
||||||
|
llm_api_key: string
|
||||||
severity_overrides: SeverityOverride[]
|
severity_overrides: SeverityOverride[]
|
||||||
}
|
}
|
||||||
|
|
||||||
const prefs = ref<Prefs>({ entry_point_style: 'topbar', llm_url: '', llm_model: '', severity_overrides: [] })
|
const prefs = ref<Prefs>({ entry_point_style: 'topbar', llm_url: '', llm_model: '', llm_api_key: '', severity_overrides: [] })
|
||||||
const saveStatus = ref<{ ok: boolean; msg: string } | null>(null)
|
const saveStatus = ref<{ ok: boolean; msg: string } | null>(null)
|
||||||
const showAddOverride = ref(false)
|
const showAddOverride = ref(false)
|
||||||
const newRule = ref<SeverityOverride>({ name: '', pattern: '', override_severity: 'WARN', enabled: true })
|
const newRule = ref<SeverityOverride>({ name: '', pattern: '', override_severity: 'WARN', enabled: true })
|
||||||
|
|
@ -215,7 +230,7 @@ async function setEntryPoint(style: 'topbar' | 'fab') {
|
||||||
async function saveLlm() {
|
async function saveLlm() {
|
||||||
saveStatus.value = null
|
saveStatus.value = null
|
||||||
try {
|
try {
|
||||||
await patch({ llm_url: prefs.value.llm_url, llm_model: prefs.value.llm_model })
|
await patch({ llm_url: prefs.value.llm_url, llm_model: prefs.value.llm_model, llm_api_key: prefs.value.llm_api_key })
|
||||||
saveStatus.value = { ok: true, msg: 'LLM settings saved' }
|
saveStatus.value = { ok: true, msg: 'LLM settings saved' }
|
||||||
setTimeout(() => { saveStatus.value = null }, 2000)
|
setTimeout(() => { saveStatus.value = null }, 2000)
|
||||||
} catch {
|
} catch {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue