feat: LLM reasoning layer — Ollama summarization on diagnose results

This commit is contained in:
pyr0ball 2026-05-11 11:35:07 -07:00
parent e1abc1e73d
commit 0a4d877ba7
7 changed files with 253 additions and 24 deletions

View file

@ -62,13 +62,21 @@ def _startup() -> None:
ensure_schema(DB_PATH)
_PREFS_DEFAULTS: dict[str, str] = {
"entry_point_style": "topbar",
"llm_url": "http://localhost:11434",
"llm_model": "llama3.1:8b",
}
def _load_prefs() -> dict[str, str]:
if PREFS_PATH.exists():
try:
return json.loads(PREFS_PATH.read_text())
saved = json.loads(PREFS_PATH.read_text())
return {**_PREFS_DEFAULTS, **saved}
except (json.JSONDecodeError, OSError):
pass
return {"entry_point_style": "topbar"}
return dict(_PREFS_DEFAULTS)
def _save_prefs(data: dict[str, str]) -> None:
@ -82,7 +90,9 @@ class DiagnoseRequest(BaseModel):
class SettingsBody(BaseModel):
entry_point_style: str
entry_point_style: str | None = None
llm_url: str | None = None
llm_model: str | None = None
class IncidentCreate(BaseModel):
@ -202,9 +212,18 @@ def diagnose_post(body: DiagnoseRequest) -> dict:
},
"entries": [],
}
result = _diagnose(DB_PATH, query=body.query, since=body.since, until=body.until)
prefs = _load_prefs()
result = _diagnose(
DB_PATH,
query=body.query,
since=body.since,
until=body.until,
llm_url=prefs.get("llm_url") or None,
llm_model=prefs.get("llm_model") or None,
)
return {
"summary": result["summary"],
"reasoning": result.get("reasoning"),
"entries": [dataclasses.asdict(r) for r in result["entries"]],
}
@ -216,10 +235,15 @@ def get_settings() -> dict:
@router.patch("/api/settings")
def patch_settings(body: SettingsBody) -> dict:
prefs = _load_prefs()
if body.entry_point_style is not None:
if body.entry_point_style not in ("topbar", "fab"):
raise HTTPException(status_code=422, detail="entry_point_style must be 'topbar' or 'fab'")
prefs = _load_prefs()
prefs["entry_point_style"] = body.entry_point_style
if body.llm_url is not None:
prefs["llm_url"] = body.llm_url
if body.llm_model is not None:
prefs["llm_model"] = body.llm_model
_save_prefs(prefs)
return prefs

View file

@ -7,6 +7,7 @@ from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any
from app.services.llm import summarize
from app.services.search import SearchResult, entries_in_window, search
logger = logging.getLogger(__name__)
@ -48,6 +49,8 @@ def diagnose(
query: str,
since: str | None = None,
until: str | None = None,
llm_url: str | None = None,
llm_model: str | None = None,
) -> dict[str, Any]:
"""Run layered log search with NL time extraction. Returns summary + entries."""
time_detected = since is not None and until is not None
@ -79,6 +82,10 @@ def diagnose(
by_severity[sev] += 1
by_source[r.source_id] = by_source.get(r.source_id, 0) + 1
reasoning: str | None = None
if llm_url and llm_model:
reasoning = summarize(query, combined, llm_url=llm_url, llm_model=llm_model)
return {
"summary": {
"total": len(combined),
@ -88,6 +95,7 @@ def diagnose(
"by_severity": by_severity,
"by_source": by_source,
},
"reasoning": reasoning,
"entries": combined,
}

56
app/services/llm.py Normal file
View file

@ -0,0 +1,56 @@
import logging
import httpx
from app.services.search import SearchResult
logger = logging.getLogger(__name__)
_SEVERITY_RANK = {"CRITICAL": 0, "ERROR": 1, "WARN": 2, "WARNING": 2}
_PROMPT_TEMPLATE = """\
You are a homelab diagnostic assistant. A user described a symptom and the system retrieved relevant log entries.
Analyze the log entries below and write a 2-4 sentence plain-language diagnosis. Focus on errors and their likely root cause. Be specific and concise name the services involved, not generic platitudes.
User query: {query}
Log entries ({n} shown, highest severity first):
{log_block}
Diagnosis:"""
def _build_context(entries: list[SearchResult], max_entries: int = 25) -> str:
ranked = sorted(
entries,
key=lambda e: (_SEVERITY_RANK.get(e.severity or "", 3), e.timestamp_iso or ""),
)[:max_entries]
return "\n".join(
f"[{e.timestamp_iso or '?'}] [{e.severity or 'INFO'}] {e.text[:200]}"
for e in ranked
)
def summarize(
query: str,
entries: list[SearchResult],
llm_url: str,
llm_model: str,
timeout: float = 20.0,
) -> str | None:
if not entries:
return None
log_block = _build_context(entries)
prompt = _PROMPT_TEMPLATE.format(query=query, n=min(len(entries), 25), log_block=log_block)
try:
resp = httpx.post(
f"{llm_url.rstrip('/')}/api/generate",
json={"model": llm_model, "prompt": prompt, "stream": False},
timeout=timeout,
)
resp.raise_for_status()
return resp.json().get("response", "").strip() or None
except Exception as exc:
logger.warning("LLM summarization failed (%s): %s", type(exc).__name__, exc)
return None

View file

@ -5,3 +5,4 @@ pyyaml>=6.0
aiofiles>=23.0.0
python-multipart>=0.0.9
dateparser>=1.2.0
httpx>=0.27.0

View file

@ -0,0 +1,71 @@
"""Tests for app/services/llm.py — graceful failure and context building."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
from app.services.llm import summarize, _build_context
from app.services.search import SearchResult
def _entry(text: str, severity: str = "INFO", ts: str = "2026-05-06T21:00:00+00:00") -> SearchResult:
return SearchResult(
entry_id="x",
source_id="svc",
sequence=0,
timestamp_iso=ts,
severity=severity,
text=text,
matched_patterns=[],
repeat_count=1,
out_of_order=False,
rank=0.0,
)
def test_summarize_returns_none_on_connection_error():
with patch("app.services.llm.httpx.post", side_effect=ConnectionError("refused")):
result = summarize("ollama crashed", [_entry("failed")], "http://bad", "llama3")
assert result is None
def test_summarize_returns_none_on_http_error():
mock_resp = MagicMock()
mock_resp.raise_for_status.side_effect = Exception("404")
with patch("app.services.llm.httpx.post", return_value=mock_resp):
result = summarize("ollama crashed", [_entry("failed")], "http://host", "llama3")
assert result is None
def test_summarize_returns_none_on_empty_response():
mock_resp = MagicMock()
mock_resp.raise_for_status.return_value = None
mock_resp.json.return_value = {"response": ""}
with patch("app.services.llm.httpx.post", return_value=mock_resp):
result = summarize("query", [_entry("x")], "http://host", "llama3")
assert result is None
def test_summarize_returns_text_on_success():
mock_resp = MagicMock()
mock_resp.raise_for_status.return_value = None
mock_resp.json.return_value = {"response": "Ollama exited with code 1."}
with patch("app.services.llm.httpx.post", return_value=mock_resp):
result = summarize("ollama crashed", [_entry("Failed")], "http://host", "llama3")
assert result == "Ollama exited with code 1."
def test_build_context_sorts_errors_first():
entries = [
_entry("info message", severity="INFO"),
_entry("critical crash", severity="CRITICAL"),
_entry("warn spike", severity="WARN"),
]
ctx = _build_context(entries)
lines = ctx.splitlines()
assert "CRITICAL" in lines[0]
assert "WARN" in lines[1]
def test_summarize_empty_entries_returns_none():
result = summarize("query", [], "http://host", "model")
assert result is None

View file

@ -45,6 +45,18 @@
</div>
</div>
<!-- LLM reasoning card -->
<div
v-if="reasoning"
class="mb-4 rounded border border-accent/30 bg-accent/5 p-4"
>
<div class="flex items-center gap-2 mb-2 text-xs text-text-dim font-medium uppercase tracking-wide">
<span></span>
<span>Diagnosis</span>
</div>
<p class="text-sm text-text-primary leading-relaxed whitespace-pre-wrap">{{ reasoning }}</p>
</div>
<!-- Log stream -->
<div v-if="entries.length" class="rounded border border-surface-border overflow-hidden mb-4">
<LogEntryRow v-for="entry in entries" :key="entry.entry_id" :entry="entry" />
@ -140,6 +152,7 @@ interface Summary {
const query = ref('')
const entries = ref<LogEntry[]>([])
const summary = ref<Summary | null>(null)
const reasoning = ref<string | null>(null)
const loading = ref(false)
const error = ref<string | null>(null)
const ranOnce = ref(false)
@ -185,6 +198,7 @@ async function run() {
const data = await res.json()
entries.value = data.entries
summary.value = data.summary
reasoning.value = data.reasoning ?? null
capturedSince = data.summary.window_start
capturedUntil = data.summary.window_end
} catch (e) {

View file

@ -8,6 +8,7 @@
</div>
<div class="rounded border border-surface-border bg-surface-raised p-5 space-y-6">
<!-- Entry point -->
<div>
<h2 class="text-text-primary text-sm font-semibold mb-1">Quick Capture Entry Point</h2>
<p class="text-text-dim text-xs mb-3">
@ -29,16 +30,51 @@
<div class="text-xs text-text-dim mt-0.5">{{ opt.desc }}</div>
</button>
</div>
</div>
<!-- LLM config -->
<div>
<h2 class="text-text-primary text-sm font-semibold mb-1">LLM Reasoning</h2>
<p class="text-text-dim text-xs mb-3">
Ollama endpoint used to generate plain-language diagnoses. Leave blank to disable.
</p>
<div class="space-y-3">
<div>
<label class="block text-xs text-text-dim mb-1">Ollama URL</label>
<input
v-model="prefs.llm_url"
type="text"
placeholder="http://localhost:11434"
class="w-full bg-surface border border-surface-border rounded px-3 py-2 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent transition-colors"
/>
</div>
<div>
<label class="block text-xs text-text-dim mb-1">Model</label>
<input
v-model="prefs.llm_model"
type="text"
placeholder="llama3.1:8b"
class="w-full bg-surface border border-surface-border rounded px-3 py-2 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent transition-colors"
/>
</div>
<button
@click="saveLlm"
class="px-4 py-2 bg-accent text-surface text-sm rounded font-medium hover:opacity-90 transition-opacity"
>
Save LLM settings
</button>
</div>
</div>
<p
v-if="saveStatus"
class="text-xs mt-2"
class="text-xs"
:class="saveStatus.ok ? 'text-green-400' : 'text-sev-error'"
>
{{ saveStatus.msg }}
</p>
</div>
</div>
</div>
</template>
<script setup lang="ts">
@ -46,9 +82,13 @@ import { ref, onMounted } from 'vue'
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
interface Prefs { entry_point_style: 'topbar' | 'fab' }
interface Prefs {
entry_point_style: 'topbar' | 'fab'
llm_url: string
llm_model: string
}
const prefs = ref<Prefs>({ entry_point_style: 'topbar' })
const prefs = ref<Prefs>({ entry_point_style: 'topbar', llm_url: '', llm_model: '' })
const saveStatus = ref<{ ok: boolean; msg: string } | null>(null)
const entryPointOptions = [
@ -60,23 +100,38 @@ onMounted(async () => {
try {
const res = await fetch(`${BASE}/api/settings`)
if (res.ok) prefs.value = await res.json()
} catch { /* non-critical — default stays topbar */ }
} catch { /* non-critical — defaults stay */ }
})
async function setEntryPoint(style: 'topbar' | 'fab') {
prefs.value = { entry_point_style: style }
saveStatus.value = null
try {
async function patch(body: Partial<Prefs>) {
const res = await fetch(`${BASE}/api/settings`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ entry_point_style: style }),
body: JSON.stringify(body),
})
if (!res.ok) throw new Error(await res.text())
prefs.value = await res.json()
}
async function setEntryPoint(style: 'topbar' | 'fab') {
saveStatus.value = null
try {
await patch({ entry_point_style: style })
saveStatus.value = { ok: true, msg: 'Saved' }
setTimeout(() => { saveStatus.value = null }, 2000)
} catch {
saveStatus.value = { ok: false, msg: 'Save failed — check server connection' }
}
}
async function saveLlm() {
saveStatus.value = null
try {
await patch({ llm_url: prefs.value.llm_url, llm_model: prefs.value.llm_model })
saveStatus.value = { ok: true, msg: 'LLM settings saved' }
setTimeout(() => { saveStatus.value = null }, 2000)
} catch {
saveStatus.value = { ok: false, msg: 'Save failed — check server connection' }
}
}
</script>