From 4c1940d12ef136f03aa7024a20b19080f9ebdca3 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Tue, 16 Jun 2026 09:42:44 -0700
Subject: [PATCH] fix: strip reasoning-model thinking tags; surface untracked
 node names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- app/services/diagnose/_llm_client.py: strip <think>…</think> blocks
  (case-insensitive, multiline) from LLM response content before it
  reaches the UI or any JSON parser — affects DeepSeek-R1, Qwen QwQ,
  and any other model that emits chain-of-thought in content
- app/rest.py: suggest_sources now also returns untracked_names — query
  tokens that look like hostnames/service names but don't appear in any
  monitored source, so the UI can prompt the user to add them
- web/src/components/ChatDiagnose.vue: show amber "Not monitoring: X"
  banner with "Add as a log source →" link when untracked_names present
- tests/test_llm_client.py: 13 tests covering think-strip edge cases
  (single/multi-line, multiple blocks, case-insensitive, only-thinking)
  plus existing extract_content and JSON-fence helpers
---
 app/rest.py                          | 24 ++++++--
 app/services/diagnose/_llm_client.py | 16 ++++-
 tests/test_llm_client.py             | 87 ++++++++++++++++++++++++++++
 web/src/components/ChatDiagnose.vue  | 21 +++++++
 4 files changed, 143 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_llm_client.py
diff --git a/app/rest.py b/app/rest.py
index 35c5e2d..ad9ddda 100644
--- a/app/rest.py
+++ b/app/rest.py
@@ -555,24 +555,35 @@ _SUGGEST_STOPWORDS = frozenset({
 
 @router.post("/api/sources/suggest")
 def suggest_sources(body: SourceSuggestRequest) -> dict:
-    """Return source IDs ranked by relevance to a natural-language problem description."""
+    """Return source IDs ranked by relevance to a natural-language problem description.
+
+    Also returns ``untracked_names`` — query tokens that look like hostnames or
+    service names but do not appear in any monitored source, so the UI can
+    prompt the user to add them.
+    """
     all_sources = _list_sources(DB_PATH)
     query_tokens = {
         t.lower()
-        for t in re.findall(r"[a-zA-Z]+", body.query)
+        for t in re.findall(r"[a-zA-Z][a-zA-Z0-9_-]*", body.query)
         if len(t) > 2 and t.lower() not in _SUGGEST_STOPWORDS
     }
 
-    suggestions = []
+    # Build a flat set of every token present in any source ID
+    all_source_tokens: set[str] = set()
+    source_token_map: dict[str, set[str]] = {}
     for src in all_sources:
         src_id: str = src["source_id"]
-        # Tokenise source ID: split on colon, dash, underscore, digits
         parts = {
             p.lower()
             for seg in re.split(r"[:\-_\d]+", src_id)
             for p in [seg.strip()]
             if len(p) > 2
         }
+        source_token_map[src_id] = parts
+        all_source_tokens |= parts
+
+    suggestions = []
+    for src_id, parts in source_token_map.items():
         matched = query_tokens & parts
         if matched:
             score = round(len(matched) / max(len(parts), 1), 3)
@@ -583,8 +594,13 @@ def suggest_sources(body: SourceSuggestRequest) -> dict:
             })
 
     suggestions.sort(key=lambda x: x["score"], reverse=True)
+
+    # Tokens that look like host/service names but aren't in any source
+    untracked = sorted(query_tokens - all_source_tokens)
+
     return {
         "suggested": suggestions,
+        "untracked_names": untracked,
         "all_source_ids": [s["source_id"] for s in all_sources],
     }
 
diff --git a/app/services/diagnose/_llm_client.py b/app/services/diagnose/_llm_client.py
index 87e695d..bb3e212 100644
--- a/app/services/diagnose/_llm_client.py
+++ b/app/services/diagnose/_llm_client.py
@@ -23,16 +23,30 @@ _JSON_FENCE_RE = re.compile(
     re.MULTILINE,
 )
 
+# Reasoning models (DeepSeek-R1, Qwen QwQ, Llama thinking variants) embed
+# chain-of-thought inside <think>…</think> tags in the content field.
+# Strip them so only the final response reaches the UI.
+_THINK_TAG_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
+
+
+def _strip_thinking(text: str) -> str:
+    """Remove <think>…</think> blocks and trim surrounding whitespace."""
+    return _THINK_TAG_RE.sub("", text).strip()
+
 
 def extract_content(resp_json: dict) -> str | None:
     """Pull text content from an OpenAI-compat chat completion response.
 
+    Strips reasoning-model thinking tags before returning.
     Returns None when the response has no choices or empty content.
     """
     choices = resp_json.get("choices") or []
     if not choices:
         return None
-    return (choices[0].get("message", {}).get("content") or "").strip() or None
+    raw = (choices[0].get("message", {}).get("content") or "").strip()
+    if not raw:
+        return None
+    return _strip_thinking(raw) or None
 
 
 def strip_json_fences(raw: str) -> str:
diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py
new file mode 100644
index 0000000..4b13b31
--- /dev/null
+++ b/tests/test_llm_client.py
@@ -0,0 +1,87 @@
+"""Tests for diagnose/_llm_client.py — thinking-tag stripping and content extraction."""
+from __future__ import annotations
+
+import pytest
+
+
+def _resp(content: str | None) -> dict:
+    if content is None:
+        return {"choices": []}
+    return {"choices": [{"message": {"content": content}}]}
+
+
+class TestExtractContent:
+    def test_returns_plain_content(self):
+        from app.services.diagnose._llm_client import extract_content
+        assert extract_content(_resp("hello world")) == "hello world"
+
+    def test_returns_none_on_empty_choices(self):
+        from app.services.diagnose._llm_client import extract_content
+        assert extract_content({"choices": []}) is None
+
+    def test_returns_none_on_empty_content(self):
+        from app.services.diagnose._llm_client import extract_content
+        assert extract_content(_resp("")) is None
+
+    def test_strips_single_think_block(self):
+        from app.services.diagnose._llm_client import extract_content
+        raw = "<think>Let me reason about this…</think>\nThe answer is 42."
+        assert extract_content(_resp(raw)) == "The answer is 42."
+
+    def test_strips_multi_line_think_block(self):
+        from app.services.diagnose._llm_client import extract_content
+        raw = "<think>\nStep 1: consider X\nStep 2: consider Y\n</think>\n\nFinal answer here."
+        result = extract_content(_resp(raw))
+        assert result == "Final answer here."
+        assert "<think>" not in result
+
+    def test_strips_multiple_think_blocks(self):
+        from app.services.diagnose._llm_client import extract_content
+        raw = "<think>first</think> actual <think>second</think> content"
+        result = extract_content(_resp(raw))
+        assert "<think>" not in result
+        assert "actual" in result
+        assert "content" in result
+
+    def test_strips_case_insensitive(self):
+        from app.services.diagnose._llm_client import extract_content
+        raw = "<THINK>hidden</THINK> visible"
+        result = extract_content(_resp(raw))
+        assert result == "visible"
+
+    def test_returns_none_when_only_thinking_remains(self):
+        from app.services.diagnose._llm_client import extract_content
+        raw = "<think>only thinking, no output</think>"
+        assert extract_content(_resp(raw)) is None
+
+    def test_content_without_thinking_unchanged(self):
+        from app.services.diagnose._llm_client import extract_content
+        raw = "Redis OOM at 03:00 — key eviction triggered by batch job."
+        assert extract_content(_resp(raw)) == raw
+
+
+class TestStripJsonFences:
+    def test_strips_json_fence(self):
+        from app.services.diagnose._llm_client import strip_json_fences
+        raw = "```json\n[{\"a\": 1}]\n```"
+        assert strip_json_fences(raw) == '[{"a": 1}]'
+
+    def test_strips_plain_fence(self):
+        from app.services.diagnose._llm_client import strip_json_fences
+        raw = "```\nhello\n```"
+        assert "```" not in strip_json_fences(raw)
+
+
+class TestExtractFirstJsonArray:
+    def test_extracts_array_from_mixed_text(self):
+        from app.services.diagnose._llm_client import extract_first_json_array
+        raw = 'Here is the result:\n[{"id": 1}, {"id": 2}]\nThat is all.'
+        result = extract_first_json_array(raw)
+        import json
+        parsed = json.loads(result)
+        assert len(parsed) == 2
+
+    def test_returns_original_when_no_array(self):
+        from app.services.diagnose._llm_client import extract_first_json_array
+        raw = "no array here"
+        assert extract_first_json_array(raw) == raw
diff --git a/web/src/components/ChatDiagnose.vue b/web/src/components/ChatDiagnose.vue
index eb87110..7e7c0e8 100644
--- a/web/src/components/ChatDiagnose.vue
+++ b/web/src/components/ChatDiagnose.vue
@@ -137,6 +137,24 @@
       </div>
     </div>
 
+    <!-- Untracked name nudge -->
+    <div
+      v-if="untrackedNames.length && !activeTurn"
+      class="mb-3 p-3 rounded border border-yellow-700/40 bg-yellow-900/10"
+    >
+      <p class="text-xs text-yellow-400 mb-1">Not monitoring:
+        <span
+          v-for="name in untrackedNames"
+          :key="name"
+          class="font-mono ml-1 px-1.5 py-0.5 rounded bg-yellow-900/30 border border-yellow-700/30"
+        >{{ name }}</span>
+      </p>
+      <RouterLink
+        to="/sources"
+        class="text-xs text-accent hover:underline"
+      >Add as a log source →</RouterLink>
+    </div>
+
     <!-- Input row -->
     <div class="border-t border-surface-border pt-3">
       <div class="flex gap-2 items-end">
@@ -215,6 +233,7 @@ interface Turn {
 const turns            = ref<Turn[]>([])
 const draft            = ref('')
 const suggestedSources = ref<SuggestedSource[]>([])
+const untrackedNames   = ref<string[]>([])
 const excludedSources  = ref(new Set<string>())
 const activeTurn       = ref<Turn | null>(null)
 const scrollEl         = ref<HTMLElement | null>(null)
@@ -237,6 +256,7 @@ function onInput() {
     suggestTimer = setTimeout(fetchSuggestions, 400)
   } else {
     suggestedSources.value = []
+    untrackedNames.value = []
   }
 }
 
@@ -250,6 +270,7 @@ async function fetchSuggestions() {
     if (!res.ok) return
     const data = await res.json()
     suggestedSources.value = (data.suggested ?? []).slice(0, 6)
+    untrackedNames.value = data.untracked_names ?? []
     // Reset exclusions when suggestions change
     excludedSources.value = new Set()
   } catch { /* non-critical */ }