From 4c1940d12ef136f03aa7024a20b19080f9ebdca3 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 16 Jun 2026 09:42:44 -0700 Subject: [PATCH] fix: strip reasoning-model thinking tags; surface untracked node names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - app/services/diagnose/_llm_client.py: strip blocks (case-insensitive, multiline) from LLM response content before it reaches the UI or any JSON parser — affects DeepSeek-R1, Qwen QwQ, and any other model that emits chain-of-thought in content - app/rest.py: suggest_sources now also returns untracked_names — query tokens that look like hostnames/service names but don't appear in any monitored source, so the UI can prompt the user to add them - web/src/components/ChatDiagnose.vue: show amber "Not monitoring: X" banner with "Add as a log source →" link when untracked_names present - tests/test_llm_client.py: 13 tests covering think-strip edge cases (single/multi-line, multiple blocks, case-insensitive, only-thinking) plus existing extract_content and JSON-fence helpers --- app/rest.py | 24 ++++++-- app/services/diagnose/_llm_client.py | 16 ++++- tests/test_llm_client.py | 87 ++++++++++++++++++++++++++++ web/src/components/ChatDiagnose.vue | 21 +++++++ 4 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 tests/test_llm_client.py diff --git a/app/rest.py b/app/rest.py index 35c5e2d..ad9ddda 100644 --- a/app/rest.py +++ b/app/rest.py @@ -555,24 +555,35 @@ _SUGGEST_STOPWORDS = frozenset({ @router.post("/api/sources/suggest") def suggest_sources(body: SourceSuggestRequest) -> dict: - """Return source IDs ranked by relevance to a natural-language problem description.""" + """Return source IDs ranked by relevance to a natural-language problem description. + + Also returns ``untracked_names`` — query tokens that look like hostnames or + service names but do not appear in any monitored source, so the UI can + prompt the user to add them. + """ all_sources = _list_sources(DB_PATH) query_tokens = { t.lower() - for t in re.findall(r"[a-zA-Z]+", body.query) + for t in re.findall(r"[a-zA-Z][a-zA-Z0-9_-]*", body.query) if len(t) > 2 and t.lower() not in _SUGGEST_STOPWORDS } - suggestions = [] + # Build a flat set of every token present in any source ID + all_source_tokens: set[str] = set() + source_token_map: dict[str, set[str]] = {} for src in all_sources: src_id: str = src["source_id"] - # Tokenise source ID: split on colon, dash, underscore, digits parts = { p.lower() for seg in re.split(r"[:\-_\d]+", src_id) for p in [seg.strip()] if len(p) > 2 } + source_token_map[src_id] = parts + all_source_tokens |= parts + + suggestions = [] + for src_id, parts in source_token_map.items(): matched = query_tokens & parts if matched: score = round(len(matched) / max(len(parts), 1), 3) @@ -583,8 +594,13 @@ def suggest_sources(body: SourceSuggestRequest) -> dict: }) suggestions.sort(key=lambda x: x["score"], reverse=True) + + # Tokens that look like host/service names but aren't in any source + untracked = sorted(query_tokens - all_source_tokens) + return { "suggested": suggestions, + "untracked_names": untracked, "all_source_ids": [s["source_id"] for s in all_sources], } diff --git a/app/services/diagnose/_llm_client.py b/app/services/diagnose/_llm_client.py index 87e695d..bb3e212 100644 --- a/app/services/diagnose/_llm_client.py +++ b/app/services/diagnose/_llm_client.py @@ -23,16 +23,30 @@ _JSON_FENCE_RE = re.compile( re.MULTILINE, ) +# Reasoning models (DeepSeek-R1, Qwen QwQ, Llama thinking variants) embed +# chain-of-thought inside tags in the content field. +# Strip them so only the final response reaches the UI. +_THINK_TAG_RE = re.compile(r".*?", re.DOTALL | re.IGNORECASE) + + +def _strip_thinking(text: str) -> str: + """Remove blocks and trim surrounding whitespace.""" + return _THINK_TAG_RE.sub("", text).strip() + def extract_content(resp_json: dict) -> str | None: """Pull text content from an OpenAI-compat chat completion response. + Strips reasoning-model thinking tags before returning. Returns None when the response has no choices or empty content. """ choices = resp_json.get("choices") or [] if not choices: return None - return (choices[0].get("message", {}).get("content") or "").strip() or None + raw = (choices[0].get("message", {}).get("content") or "").strip() + if not raw: + return None + return _strip_thinking(raw) or None def strip_json_fences(raw: str) -> str: diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py new file mode 100644 index 0000000..4b13b31 --- /dev/null +++ b/tests/test_llm_client.py @@ -0,0 +1,87 @@ +"""Tests for diagnose/_llm_client.py — thinking-tag stripping and content extraction.""" +from __future__ import annotations + +import pytest + + +def _resp(content: str | None) -> dict: + if content is None: + return {"choices": []} + return {"choices": [{"message": {"content": content}}]} + + +class TestExtractContent: + def test_returns_plain_content(self): + from app.services.diagnose._llm_client import extract_content + assert extract_content(_resp("hello world")) == "hello world" + + def test_returns_none_on_empty_choices(self): + from app.services.diagnose._llm_client import extract_content + assert extract_content({"choices": []}) is None + + def test_returns_none_on_empty_content(self): + from app.services.diagnose._llm_client import extract_content + assert extract_content(_resp("")) is None + + def test_strips_single_think_block(self): + from app.services.diagnose._llm_client import extract_content + raw = "Let me reason about this…\nThe answer is 42." + assert extract_content(_resp(raw)) == "The answer is 42." + + def test_strips_multi_line_think_block(self): + from app.services.diagnose._llm_client import extract_content + raw = "\nStep 1: consider X\nStep 2: consider Y\n\n\nFinal answer here." + result = extract_content(_resp(raw)) + assert result == "Final answer here." + assert "" not in result + + def test_strips_multiple_think_blocks(self): + from app.services.diagnose._llm_client import extract_content + raw = "first actual second content" + result = extract_content(_resp(raw)) + assert "" not in result + assert "actual" in result + assert "content" in result + + def test_strips_case_insensitive(self): + from app.services.diagnose._llm_client import extract_content + raw = "hidden visible" + result = extract_content(_resp(raw)) + assert result == "visible" + + def test_returns_none_when_only_thinking_remains(self): + from app.services.diagnose._llm_client import extract_content + raw = "only thinking, no output" + assert extract_content(_resp(raw)) is None + + def test_content_without_thinking_unchanged(self): + from app.services.diagnose._llm_client import extract_content + raw = "Redis OOM at 03:00 — key eviction triggered by batch job." + assert extract_content(_resp(raw)) == raw + + +class TestStripJsonFences: + def test_strips_json_fence(self): + from app.services.diagnose._llm_client import strip_json_fences + raw = "```json\n[{\"a\": 1}]\n```" + assert strip_json_fences(raw) == '[{"a": 1}]' + + def test_strips_plain_fence(self): + from app.services.diagnose._llm_client import strip_json_fences + raw = "```\nhello\n```" + assert "```" not in strip_json_fences(raw) + + +class TestExtractFirstJsonArray: + def test_extracts_array_from_mixed_text(self): + from app.services.diagnose._llm_client import extract_first_json_array + raw = 'Here is the result:\n[{"id": 1}, {"id": 2}]\nThat is all.' + result = extract_first_json_array(raw) + import json + parsed = json.loads(result) + assert len(parsed) == 2 + + def test_returns_original_when_no_array(self): + from app.services.diagnose._llm_client import extract_first_json_array + raw = "no array here" + assert extract_first_json_array(raw) == raw diff --git a/web/src/components/ChatDiagnose.vue b/web/src/components/ChatDiagnose.vue index eb87110..7e7c0e8 100644 --- a/web/src/components/ChatDiagnose.vue +++ b/web/src/components/ChatDiagnose.vue @@ -137,6 +137,24 @@ + +
+

Not monitoring: + {{ name }} +

+ Add as a log source → +
+
@@ -215,6 +233,7 @@ interface Turn { const turns = ref([]) const draft = ref('') const suggestedSources = ref([]) +const untrackedNames = ref([]) const excludedSources = ref(new Set()) const activeTurn = ref(null) const scrollEl = ref(null) @@ -237,6 +256,7 @@ function onInput() { suggestTimer = setTimeout(fetchSuggestions, 400) } else { suggestedSources.value = [] + untrackedNames.value = [] } } @@ -250,6 +270,7 @@ async function fetchSuggestions() { if (!res.ok) return const data = await res.json() suggestedSources.value = (data.suggested ?? []).slice(0, 6) + untrackedNames.value = data.untracked_names ?? [] // Reset exclusions when suggestions change excludedSources.value = new Set() } catch { /* non-critical */ }