feat: SearchParamsResponse dataclass and JSON parser for LLM query builder

This commit is contained in:
pyr0ball 2026-04-14 11:40:44 -07:00
parent 15718ab431
commit 3c54a65dda
3 changed files with 167 additions and 0 deletions

5
app/llm/__init__.py Normal file
View file

@ -0,0 +1,5 @@
# app/llm/__init__.py
# BSL 1.1 License
from .query_translator import QueryTranslator, QueryTranslatorError, SearchParamsResponse
__all__ = ["QueryTranslator", "QueryTranslatorError", "SearchParamsResponse"]

View file

@ -0,0 +1,90 @@
# app/llm/query_translator.py
# BSL 1.1 License
"""LLM query builder — translates natural language to eBay SearchFilters.
The QueryTranslator calls LLMRouter.complete() (synchronous) with a domain-aware
system prompt. The prompt includes category hints injected from EbayCategoryCache.
The LLM returns a single JSON object matching SearchParamsResponse.
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass
from typing import Optional
log = logging.getLogger(__name__)
class QueryTranslatorError(Exception):
"""Raised when the LLM output cannot be parsed into SearchParamsResponse."""
def __init__(self, message: str, raw: str = "") -> None:
super().__init__(message)
self.raw = raw
@dataclass(frozen=True)
class SearchParamsResponse:
"""Parsed LLM response — maps 1:1 to the /api/search query parameters."""
base_query: str
must_include_mode: str # "all" | "any" | "groups"
must_include: str # raw filter string
must_exclude: str # comma-separated exclusion terms
max_price: Optional[float]
min_price: Optional[float]
condition: list[str] # subset of ["new", "used", "for_parts"]
category_id: Optional[str] # eBay category ID string, or None
explanation: str # one-sentence plain-language summary
_VALID_MODES = {"all", "any", "groups"}
_VALID_CONDITIONS = {"new", "used", "for_parts"}
def _parse_response(raw: str) -> SearchParamsResponse:
"""Parse the LLM's raw text output into a SearchParamsResponse.
Raises QueryTranslatorError if the JSON is malformed or required fields
are missing.
"""
try:
data = json.loads(raw.strip())
except json.JSONDecodeError as exc:
raise QueryTranslatorError(f"LLM returned unparseable JSON: {exc}", raw=raw) from exc
try:
base_query = str(data["base_query"]).strip()
if not base_query:
raise KeyError("base_query is empty")
must_include_mode = str(data.get("must_include_mode", "all"))
if must_include_mode not in _VALID_MODES:
must_include_mode = "all"
must_include = str(data.get("must_include", ""))
must_exclude = str(data.get("must_exclude", ""))
max_price = float(data["max_price"]) if data.get("max_price") is not None else None
min_price = float(data["min_price"]) if data.get("min_price") is not None else None
raw_conditions = data.get("condition", [])
condition = [c for c in raw_conditions if c in _VALID_CONDITIONS]
category_id = str(data["category_id"]) if data.get("category_id") else None
explanation = str(data.get("explanation", "")).strip()
except (KeyError, TypeError, ValueError) as exc:
raise QueryTranslatorError(
f"LLM response missing or invalid field: {exc}", raw=raw
) from exc
return SearchParamsResponse(
base_query=base_query,
must_include_mode=must_include_mode,
must_include=must_include,
must_exclude=must_exclude,
max_price=max_price,
min_price=min_price,
condition=condition,
category_id=category_id,
explanation=explanation,
)
class QueryTranslator:
"""Stub — implemented in Task 6."""
pass

View file

@ -0,0 +1,72 @@
"""Unit tests for QueryTranslator — LLMRouter mocked at boundary."""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import pytest
from app.llm.query_translator import QueryTranslatorError, SearchParamsResponse, _parse_response
# ── _parse_response ───────────────────────────────────────────────────────────
def test_parse_response_happy_path():
raw = json.dumps({
"base_query": "RTX 3080",
"must_include_mode": "groups",
"must_include": "rtx|geforce, 3080",
"must_exclude": "mining,for parts",
"max_price": 300.0,
"min_price": None,
"condition": ["used"],
"category_id": "27386",
"explanation": "Searching for used RTX 3080 GPUs under $300.",
})
result = _parse_response(raw)
assert result.base_query == "RTX 3080"
assert result.must_include_mode == "groups"
assert result.max_price == 300.0
assert result.min_price is None
assert result.condition == ["used"]
assert result.category_id == "27386"
assert "RTX 3080" in result.explanation
def test_parse_response_missing_optional_fields():
raw = json.dumps({
"base_query": "vintage camera",
"must_include_mode": "all",
"must_include": "",
"must_exclude": "",
"max_price": None,
"min_price": None,
"condition": [],
"category_id": None,
"explanation": "Searching for vintage cameras.",
})
result = _parse_response(raw)
assert result.category_id is None
assert result.max_price is None
assert result.condition == []
def test_parse_response_invalid_json():
with pytest.raises(QueryTranslatorError, match="unparseable"):
_parse_response("this is not json {{{ garbage")
def test_parse_response_missing_required_field():
# base_query is required — missing it should raise
raw = json.dumps({
"must_include_mode": "all",
"must_include": "",
"must_exclude": "",
"max_price": None,
"min_price": None,
"condition": [],
"category_id": None,
"explanation": "oops",
})
with pytest.raises(QueryTranslatorError):
_parse_response(raw)