fix: strip reasoning-model thinking tags; surface untracked node names

- app/services/diagnose/_llm_client.py: strip <think>…</think> blocks
  (case-insensitive, multiline) from LLM response content before it
  reaches the UI or any JSON parser — affects DeepSeek-R1, Qwen QwQ,
  and any other model that emits chain-of-thought in content
- app/rest.py: suggest_sources now also returns untracked_names — query
  tokens that look like hostnames/service names but don't appear in any
  monitored source, so the UI can prompt the user to add them
- web/src/components/ChatDiagnose.vue: show amber "Not monitoring: X"
  banner with "Add as a log source →" link when untracked_names present
- tests/test_llm_client.py: 13 tests covering think-strip edge cases
  (single/multi-line, multiple blocks, case-insensitive, only-thinking)
  plus existing extract_content and JSON-fence helpers
This commit is contained in:
pyr0ball 2026-06-16 09:42:44 -07:00
parent 6039ab2464
commit 4c1940d12e
4 changed files with 143 additions and 5 deletions

View file

@ -555,24 +555,35 @@ _SUGGEST_STOPWORDS = frozenset({
@router.post("/api/sources/suggest") @router.post("/api/sources/suggest")
def suggest_sources(body: SourceSuggestRequest) -> dict: def suggest_sources(body: SourceSuggestRequest) -> dict:
"""Return source IDs ranked by relevance to a natural-language problem description.""" """Return source IDs ranked by relevance to a natural-language problem description.
Also returns ``untracked_names`` query tokens that look like hostnames or
service names but do not appear in any monitored source, so the UI can
prompt the user to add them.
"""
all_sources = _list_sources(DB_PATH) all_sources = _list_sources(DB_PATH)
query_tokens = { query_tokens = {
t.lower() t.lower()
for t in re.findall(r"[a-zA-Z]+", body.query) for t in re.findall(r"[a-zA-Z][a-zA-Z0-9_-]*", body.query)
if len(t) > 2 and t.lower() not in _SUGGEST_STOPWORDS if len(t) > 2 and t.lower() not in _SUGGEST_STOPWORDS
} }
suggestions = [] # Build a flat set of every token present in any source ID
all_source_tokens: set[str] = set()
source_token_map: dict[str, set[str]] = {}
for src in all_sources: for src in all_sources:
src_id: str = src["source_id"] src_id: str = src["source_id"]
# Tokenise source ID: split on colon, dash, underscore, digits
parts = { parts = {
p.lower() p.lower()
for seg in re.split(r"[:\-_\d]+", src_id) for seg in re.split(r"[:\-_\d]+", src_id)
for p in [seg.strip()] for p in [seg.strip()]
if len(p) > 2 if len(p) > 2
} }
source_token_map[src_id] = parts
all_source_tokens |= parts
suggestions = []
for src_id, parts in source_token_map.items():
matched = query_tokens & parts matched = query_tokens & parts
if matched: if matched:
score = round(len(matched) / max(len(parts), 1), 3) score = round(len(matched) / max(len(parts), 1), 3)
@ -583,8 +594,13 @@ def suggest_sources(body: SourceSuggestRequest) -> dict:
}) })
suggestions.sort(key=lambda x: x["score"], reverse=True) suggestions.sort(key=lambda x: x["score"], reverse=True)
# Tokens that look like host/service names but aren't in any source
untracked = sorted(query_tokens - all_source_tokens)
return { return {
"suggested": suggestions, "suggested": suggestions,
"untracked_names": untracked,
"all_source_ids": [s["source_id"] for s in all_sources], "all_source_ids": [s["source_id"] for s in all_sources],
} }

View file

@ -23,16 +23,30 @@ _JSON_FENCE_RE = re.compile(
re.MULTILINE, re.MULTILINE,
) )
# Reasoning models (DeepSeek-R1, Qwen QwQ, Llama thinking variants) embed
# chain-of-thought inside <think>…</think> tags in the content field.
# Strip them so only the final response reaches the UI.
_THINK_TAG_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
def _strip_thinking(text: str) -> str:
"""Remove <think>…</think> blocks and trim surrounding whitespace."""
return _THINK_TAG_RE.sub("", text).strip()
def extract_content(resp_json: dict) -> str | None: def extract_content(resp_json: dict) -> str | None:
"""Pull text content from an OpenAI-compat chat completion response. """Pull text content from an OpenAI-compat chat completion response.
Strips reasoning-model thinking tags before returning.
Returns None when the response has no choices or empty content. Returns None when the response has no choices or empty content.
""" """
choices = resp_json.get("choices") or [] choices = resp_json.get("choices") or []
if not choices: if not choices:
return None return None
return (choices[0].get("message", {}).get("content") or "").strip() or None raw = (choices[0].get("message", {}).get("content") or "").strip()
if not raw:
return None
return _strip_thinking(raw) or None
def strip_json_fences(raw: str) -> str: def strip_json_fences(raw: str) -> str:

87
tests/test_llm_client.py Normal file
View file

@ -0,0 +1,87 @@
"""Tests for diagnose/_llm_client.py — thinking-tag stripping and content extraction."""
from __future__ import annotations
import pytest
def _resp(content: str | None) -> dict:
if content is None:
return {"choices": []}
return {"choices": [{"message": {"content": content}}]}
class TestExtractContent:
def test_returns_plain_content(self):
from app.services.diagnose._llm_client import extract_content
assert extract_content(_resp("hello world")) == "hello world"
def test_returns_none_on_empty_choices(self):
from app.services.diagnose._llm_client import extract_content
assert extract_content({"choices": []}) is None
def test_returns_none_on_empty_content(self):
from app.services.diagnose._llm_client import extract_content
assert extract_content(_resp("")) is None
def test_strips_single_think_block(self):
from app.services.diagnose._llm_client import extract_content
raw = "<think>Let me reason about this…</think>\nThe answer is 42."
assert extract_content(_resp(raw)) == "The answer is 42."
def test_strips_multi_line_think_block(self):
from app.services.diagnose._llm_client import extract_content
raw = "<think>\nStep 1: consider X\nStep 2: consider Y\n</think>\n\nFinal answer here."
result = extract_content(_resp(raw))
assert result == "Final answer here."
assert "<think>" not in result
def test_strips_multiple_think_blocks(self):
from app.services.diagnose._llm_client import extract_content
raw = "<think>first</think> actual <think>second</think> content"
result = extract_content(_resp(raw))
assert "<think>" not in result
assert "actual" in result
assert "content" in result
def test_strips_case_insensitive(self):
from app.services.diagnose._llm_client import extract_content
raw = "<THINK>hidden</THINK> visible"
result = extract_content(_resp(raw))
assert result == "visible"
def test_returns_none_when_only_thinking_remains(self):
from app.services.diagnose._llm_client import extract_content
raw = "<think>only thinking, no output</think>"
assert extract_content(_resp(raw)) is None
def test_content_without_thinking_unchanged(self):
from app.services.diagnose._llm_client import extract_content
raw = "Redis OOM at 03:00 — key eviction triggered by batch job."
assert extract_content(_resp(raw)) == raw
class TestStripJsonFences:
def test_strips_json_fence(self):
from app.services.diagnose._llm_client import strip_json_fences
raw = "```json\n[{\"a\": 1}]\n```"
assert strip_json_fences(raw) == '[{"a": 1}]'
def test_strips_plain_fence(self):
from app.services.diagnose._llm_client import strip_json_fences
raw = "```\nhello\n```"
assert "```" not in strip_json_fences(raw)
class TestExtractFirstJsonArray:
def test_extracts_array_from_mixed_text(self):
from app.services.diagnose._llm_client import extract_first_json_array
raw = 'Here is the result:\n[{"id": 1}, {"id": 2}]\nThat is all.'
result = extract_first_json_array(raw)
import json
parsed = json.loads(result)
assert len(parsed) == 2
def test_returns_original_when_no_array(self):
from app.services.diagnose._llm_client import extract_first_json_array
raw = "no array here"
assert extract_first_json_array(raw) == raw

View file

@ -137,6 +137,24 @@
</div> </div>
</div> </div>
<!-- Untracked name nudge -->
<div
v-if="untrackedNames.length && !activeTurn"
class="mb-3 p-3 rounded border border-yellow-700/40 bg-yellow-900/10"
>
<p class="text-xs text-yellow-400 mb-1">Not monitoring:
<span
v-for="name in untrackedNames"
:key="name"
class="font-mono ml-1 px-1.5 py-0.5 rounded bg-yellow-900/30 border border-yellow-700/30"
>{{ name }}</span>
</p>
<RouterLink
to="/sources"
class="text-xs text-accent hover:underline"
>Add as a log source </RouterLink>
</div>
<!-- Input row --> <!-- Input row -->
<div class="border-t border-surface-border pt-3"> <div class="border-t border-surface-border pt-3">
<div class="flex gap-2 items-end"> <div class="flex gap-2 items-end">
@ -215,6 +233,7 @@ interface Turn {
const turns = ref<Turn[]>([]) const turns = ref<Turn[]>([])
const draft = ref('') const draft = ref('')
const suggestedSources = ref<SuggestedSource[]>([]) const suggestedSources = ref<SuggestedSource[]>([])
const untrackedNames = ref<string[]>([])
const excludedSources = ref(new Set<string>()) const excludedSources = ref(new Set<string>())
const activeTurn = ref<Turn | null>(null) const activeTurn = ref<Turn | null>(null)
const scrollEl = ref<HTMLElement | null>(null) const scrollEl = ref<HTMLElement | null>(null)
@ -237,6 +256,7 @@ function onInput() {
suggestTimer = setTimeout(fetchSuggestions, 400) suggestTimer = setTimeout(fetchSuggestions, 400)
} else { } else {
suggestedSources.value = [] suggestedSources.value = []
untrackedNames.value = []
} }
} }
@ -250,6 +270,7 @@ async function fetchSuggestions() {
if (!res.ok) return if (!res.ok) return
const data = await res.json() const data = await res.json()
suggestedSources.value = (data.suggested ?? []).slice(0, 6) suggestedSources.value = (data.suggested ?? []).slice(0, 6)
untrackedNames.value = data.untracked_names ?? []
// Reset exclusions when suggestions change // Reset exclusions when suggestions change
excludedSources.value = new Set() excludedSources.value = new Set()
} catch { /* non-critical */ } } catch { /* non-critical */ }