fix: strip reasoning-model thinking tags; surface untracked node names

- app/services/diagnose/_llm_client.py: strip <think>…</think> blocks (case-insensitive, multiline) from LLM response content before it reaches the UI or any JSON parser — affects DeepSeek-R1, Qwen QwQ, and any other model that emits chain-of-thought in content - app/rest.py: suggest_sources now also returns untracked_names — query tokens that look like hostnames/service names but don't appear in any monitored source, so the UI can prompt the user to add them - web/src/components/ChatDiagnose.vue: show amber "Not monitoring: X" banner with "Add as a log source →" link when untracked_names present - tests/test_llm_client.py: 13 tests covering think-strip edge cases (single/multi-line, multiple blocks, case-insensitive, only-thinking) plus existing extract_content and JSON-fence helpers
2026-06-16 09:42:44 -07:00 · 2026-06-16 09:42:44 -07:00 · 4c1940d12e
commit 4c1940d12e
parent 6039ab2464
4 changed files with 143 additions and 5 deletions
--- a/app/rest.py
+++ b/app/rest.py
@ -555,24 +555,35 @@ _SUGGEST_STOPWORDS = frozenset({
@router.post("/api/sources/suggest")
 def suggest_sources(body: SourceSuggestRequest) -> dict:
-    """Return source IDs ranked by relevance to a natural-language problem description."""
+    """Return source IDs ranked by relevance to a natural-language problem description.
    Also returns ``untracked_names`` — query tokens that look like hostnames or
    service names but do not appear in any monitored source, so the UI can
    prompt the user to add them.
    """
    all_sources = _list_sources(DB_PATH)
    query_tokens = {
        t.lower()
-        for t in re.findall(r"[a-zA-Z]+", body.query)
+        for t in re.findall(r"[a-zA-Z][a-zA-Z0-9_-]*", body.query)
        if len(t) > 2 and t.lower() not in _SUGGEST_STOPWORDS
    }
-    suggestions = []
+    # Build a flat set of every token present in any source ID
    all_source_tokens: set[str] = set()
    source_token_map: dict[str, set[str]] = {}
    for src in all_sources:
        src_id: str = src["source_id"]
        # Tokenise source ID: split on colon, dash, underscore, digits
        parts = {
            p.lower()
            for seg in re.split(r"[:\-_\d]+", src_id)
            for p in [seg.strip()]
            if len(p) > 2
        }
        source_token_map[src_id] = parts
        all_source_tokens |= parts
    suggestions = []
    for src_id, parts in source_token_map.items():
        matched = query_tokens & parts
        if matched:
            score = round(len(matched) / max(len(parts), 1), 3)
@ -583,8 +594,13 @@ def suggest_sources(body: SourceSuggestRequest) -> dict:
            })
    suggestions.sort(key=lambda x: x["score"], reverse=True)
    # Tokens that look like host/service names but aren't in any source
    untracked = sorted(query_tokens - all_source_tokens)
    return {
        "suggested": suggestions,
        "untracked_names": untracked,
        "all_source_ids": [s["source_id"] for s in all_sources],
    }
--- a/app/services/diagnose/_llm_client.py
+++ b/app/services/diagnose/_llm_client.py
@ -23,16 +23,30 @@ _JSON_FENCE_RE = re.compile(
    re.MULTILINE,
 )
 # Reasoning models (DeepSeek-R1, Qwen QwQ, Llama thinking variants) embed
 # chain-of-thought inside <think>…</think> tags in the content field.
 # Strip them so only the final response reaches the UI.
 _THINK_TAG_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
 def _strip_thinking(text: str) -> str:
    """Remove <think>…</think> blocks and trim surrounding whitespace."""
    return _THINK_TAG_RE.sub("", text).strip()
 def extract_content(resp_json: dict) -> str | None:
    """Pull text content from an OpenAI-compat chat completion response.
    Strips reasoning-model thinking tags before returning.
    Returns None when the response has no choices or empty content.
    """
    choices = resp_json.get("choices") or []
    if not choices:
        return None
-    return (choices[0].get("message", {}).get("content") or "").strip() or None
+    raw = (choices[0].get("message", {}).get("content") or "").strip()
    if not raw:
        return None
    return _strip_thinking(raw) or None
 def strip_json_fences(raw: str) -> str:
--- a/tests/test_llm_client.py
+++ b/tests/test_llm_client.py
@ -0,0 +1,87 @@
 """Tests for diagnose/_llm_client.py — thinking-tag stripping and content extraction."""
 from __future__ import annotations
 import pytest
 def _resp(content: str | None) -> dict:
    if content is None:
        return {"choices": []}
    return {"choices": [{"message": {"content": content}}]}
 class TestExtractContent:
    def test_returns_plain_content(self):
        from app.services.diagnose._llm_client import extract_content
        assert extract_content(_resp("hello world")) == "hello world"
    def test_returns_none_on_empty_choices(self):
        from app.services.diagnose._llm_client import extract_content
        assert extract_content({"choices": []}) is None
    def test_returns_none_on_empty_content(self):
        from app.services.diagnose._llm_client import extract_content
        assert extract_content(_resp("")) is None
    def test_strips_single_think_block(self):
        from app.services.diagnose._llm_client import extract_content
        raw = "<think>Let me reason about this…</think>\nThe answer is 42."
        assert extract_content(_resp(raw)) == "The answer is 42."
    def test_strips_multi_line_think_block(self):
        from app.services.diagnose._llm_client import extract_content
        raw = "<think>\nStep 1: consider X\nStep 2: consider Y\n</think>\n\nFinal answer here."
        result = extract_content(_resp(raw))
        assert result == "Final answer here."
        assert "<think>" not in result
    def test_strips_multiple_think_blocks(self):
        from app.services.diagnose._llm_client import extract_content
        raw = "<think>first</think> actual <think>second</think> content"
        result = extract_content(_resp(raw))
        assert "<think>" not in result
        assert "actual" in result
        assert "content" in result
    def test_strips_case_insensitive(self):
        from app.services.diagnose._llm_client import extract_content
        raw = "<THINK>hidden</THINK> visible"
        result = extract_content(_resp(raw))
        assert result == "visible"
    def test_returns_none_when_only_thinking_remains(self):
        from app.services.diagnose._llm_client import extract_content
        raw = "<think>only thinking, no output</think>"
        assert extract_content(_resp(raw)) is None
    def test_content_without_thinking_unchanged(self):
        from app.services.diagnose._llm_client import extract_content
        raw = "Redis OOM at 03:00 — key eviction triggered by batch job."
        assert extract_content(_resp(raw)) == raw
 class TestStripJsonFences:
    def test_strips_json_fence(self):
        from app.services.diagnose._llm_client import strip_json_fences
        raw = "```json\n[{\"a\": 1}]\n```"
        assert strip_json_fences(raw) == '[{"a": 1}]'
    def test_strips_plain_fence(self):
        from app.services.diagnose._llm_client import strip_json_fences
        raw = "```\nhello\n```"
        assert "```" not in strip_json_fences(raw)
 class TestExtractFirstJsonArray:
    def test_extracts_array_from_mixed_text(self):
        from app.services.diagnose._llm_client import extract_first_json_array
        raw = 'Here is the result:\n[{"id": 1}, {"id": 2}]\nThat is all.'
        result = extract_first_json_array(raw)
        import json
        parsed = json.loads(result)
        assert len(parsed) == 2
    def test_returns_original_when_no_array(self):
        from app.services.diagnose._llm_client import extract_first_json_array
        raw = "no array here"
        assert extract_first_json_array(raw) == raw
--- a/web/src/components/ChatDiagnose.vue
+++ b/web/src/components/ChatDiagnose.vue
@ -137,6 +137,24 @@
      </div>
    </div>
    <!-- Untracked name nudge -->
    <div
      v-if="untrackedNames.length && !activeTurn"
      class="mb-3 p-3 rounded border border-yellow-700/40 bg-yellow-900/10"
    >
      <p class="text-xs text-yellow-400 mb-1">Not monitoring:
        <span
          v-for="name in untrackedNames"
          :key="name"
          class="font-mono ml-1 px-1.5 py-0.5 rounded bg-yellow-900/30 border border-yellow-700/30"
        >{{ name }}</span>
      </p>
      <RouterLink
        to="/sources"
        class="text-xs text-accent hover:underline"
      >Add as a log source →</RouterLink>
    </div>
    <!-- Input row -->
    <div class="border-t border-surface-border pt-3">
      <div class="flex gap-2 items-end">
@ -215,6 +233,7 @@ interface Turn {
 const turns            = ref<Turn[]>([])
 const draft            = ref('')
 const suggestedSources = ref<SuggestedSource[]>([])
 const untrackedNames   = ref<string[]>([])
 const excludedSources  = ref(new Set<string>())
 const activeTurn       = ref<Turn | null>(null)
 const scrollEl         = ref<HTMLElement | null>(null)
@ -237,6 +256,7 @@ function onInput() {
    suggestTimer = setTimeout(fetchSuggestions, 400)
  } else {
    suggestedSources.value = []
    untrackedNames.value = []
  }
 }
@ -250,6 +270,7 @@ async function fetchSuggestions() {
    if (!res.ok) return
    const data = await res.json()
    suggestedSources.value = (data.suggested ?? []).slice(0, 6)
    untrackedNames.value = data.untracked_names ?? []
    // Reset exclusions when suggestions change
    excludedSources.value = new Set()
  } catch { /* non-critical */ }