feat: SearchParamsResponse dataclass and JSON parser for LLM query builder

2026-04-14 11:40:44 -07:00 · 2026-04-14 11:40:44 -07:00 · 3c54a65dda
commit 3c54a65dda
parent 15718ab431
3 changed files with 167 additions and 0 deletions
--- a/app/llm/init.py
+++ b/app/llm/init.py
@ -0,0 +1,5 @@
+# app/llm/__init__.py
+# BSL 1.1 License
+from .query_translator import QueryTranslator, QueryTranslatorError, SearchParamsResponse
+
+__all__ = ["QueryTranslator", "QueryTranslatorError", "SearchParamsResponse"]
--- a/app/llm/query_translator.py
+++ b/app/llm/query_translator.py
@ -0,0 +1,90 @@
+# app/llm/query_translator.py
+# BSL 1.1 License
+"""LLM query builder — translates natural language to eBay SearchFilters.
+
+The QueryTranslator calls LLMRouter.complete() (synchronous) with a domain-aware
+system prompt. The prompt includes category hints injected from EbayCategoryCache.
+The LLM returns a single JSON object matching SearchParamsResponse.
+"""
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+log = logging.getLogger(__name__)
+
+
+class QueryTranslatorError(Exception):
+    """Raised when the LLM output cannot be parsed into SearchParamsResponse."""
+    def __init__(self, message: str, raw: str = "") -> None:
+        super().__init__(message)
+        self.raw = raw
+
+
+@dataclass(frozen=True)
+class SearchParamsResponse:
+    """Parsed LLM response — maps 1:1 to the /api/search query parameters."""
+    base_query: str
+    must_include_mode: str       # "all" | "any" | "groups"
+    must_include: str            # raw filter string
+    must_exclude: str            # comma-separated exclusion terms
+    max_price: Optional[float]
+    min_price: Optional[float]
+    condition: list[str]         # subset of ["new", "used", "for_parts"]
+    category_id: Optional[str]   # eBay category ID string, or None
+    explanation: str             # one-sentence plain-language summary
+
+
+_VALID_MODES = {"all", "any", "groups"}
+_VALID_CONDITIONS = {"new", "used", "for_parts"}
+
+
+def _parse_response(raw: str) -> SearchParamsResponse:
+    """Parse the LLM's raw text output into a SearchParamsResponse.
+
+    Raises QueryTranslatorError if the JSON is malformed or required fields
+    are missing.
+    """
+    try:
+        data = json.loads(raw.strip())
+    except json.JSONDecodeError as exc:
+        raise QueryTranslatorError(f"LLM returned unparseable JSON: {exc}", raw=raw) from exc
+
+    try:
+        base_query = str(data["base_query"]).strip()
+        if not base_query:
+            raise KeyError("base_query is empty")
+        must_include_mode = str(data.get("must_include_mode", "all"))
+        if must_include_mode not in _VALID_MODES:
+            must_include_mode = "all"
+        must_include = str(data.get("must_include", ""))
+        must_exclude = str(data.get("must_exclude", ""))
+        max_price = float(data["max_price"]) if data.get("max_price") is not None else None
+        min_price = float(data["min_price"]) if data.get("min_price") is not None else None
+        raw_conditions = data.get("condition", [])
+        condition = [c for c in raw_conditions if c in _VALID_CONDITIONS]
+        category_id = str(data["category_id"]) if data.get("category_id") else None
+        explanation = str(data.get("explanation", "")).strip()
+    except (KeyError, TypeError, ValueError) as exc:
+        raise QueryTranslatorError(
+            f"LLM response missing or invalid field: {exc}", raw=raw
+        ) from exc
+
+    return SearchParamsResponse(
+        base_query=base_query,
+        must_include_mode=must_include_mode,
+        must_include=must_include,
+        must_exclude=must_exclude,
+        max_price=max_price,
+        min_price=min_price,
+        condition=condition,
+        category_id=category_id,
+        explanation=explanation,
+    )
+
+
+class QueryTranslator:
+    """Stub — implemented in Task 6."""
+    pass
--- a/tests/test_query_translator.py
+++ b/tests/test_query_translator.py
@ -0,0 +1,72 @@
+"""Unit tests for QueryTranslator — LLMRouter mocked at boundary."""
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from app.llm.query_translator import QueryTranslatorError, SearchParamsResponse, _parse_response
+
+
+# ── _parse_response ───────────────────────────────────────────────────────────
+
+def test_parse_response_happy_path():
+    raw = json.dumps({
+        "base_query": "RTX 3080",
+        "must_include_mode": "groups",
+        "must_include": "rtx|geforce, 3080",
+        "must_exclude": "mining,for parts",
+        "max_price": 300.0,
+        "min_price": None,
+        "condition": ["used"],
+        "category_id": "27386",
+        "explanation": "Searching for used RTX 3080 GPUs under $300.",
+    })
+    result = _parse_response(raw)
+    assert result.base_query == "RTX 3080"
+    assert result.must_include_mode == "groups"
+    assert result.max_price == 300.0
+    assert result.min_price is None
+    assert result.condition == ["used"]
+    assert result.category_id == "27386"
+    assert "RTX 3080" in result.explanation
+
+
+def test_parse_response_missing_optional_fields():
+    raw = json.dumps({
+        "base_query": "vintage camera",
+        "must_include_mode": "all",
+        "must_include": "",
+        "must_exclude": "",
+        "max_price": None,
+        "min_price": None,
+        "condition": [],
+        "category_id": None,
+        "explanation": "Searching for vintage cameras.",
+    })
+    result = _parse_response(raw)
+    assert result.category_id is None
+    assert result.max_price is None
+    assert result.condition == []
+
+
+def test_parse_response_invalid_json():
+    with pytest.raises(QueryTranslatorError, match="unparseable"):
+        _parse_response("this is not json {{{ garbage")
+
+
+def test_parse_response_missing_required_field():
+    # base_query is required — missing it should raise
+    raw = json.dumps({
+        "must_include_mode": "all",
+        "must_include": "",
+        "must_exclude": "",
+        "max_price": None,
+        "min_price": None,
+        "condition": [],
+        "category_id": None,
+        "explanation": "oops",
+    })
+    with pytest.raises(QueryTranslatorError):
+        _parse_response(raw)