From 3c54a65dda4b8ec00dccd7ed24bfb13bf79e0190 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 14 Apr 2026 11:40:44 -0700 Subject: [PATCH] feat: SearchParamsResponse dataclass and JSON parser for LLM query builder --- app/llm/__init__.py | 5 ++ app/llm/query_translator.py | 90 ++++++++++++++++++++++++++++++++++ tests/test_query_translator.py | 72 +++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 app/llm/__init__.py create mode 100644 app/llm/query_translator.py create mode 100644 tests/test_query_translator.py diff --git a/app/llm/__init__.py b/app/llm/__init__.py new file mode 100644 index 0000000..b18a97e --- /dev/null +++ b/app/llm/__init__.py @@ -0,0 +1,5 @@ +# app/llm/__init__.py +# BSL 1.1 License +from .query_translator import QueryTranslator, QueryTranslatorError, SearchParamsResponse + +__all__ = ["QueryTranslator", "QueryTranslatorError", "SearchParamsResponse"] diff --git a/app/llm/query_translator.py b/app/llm/query_translator.py new file mode 100644 index 0000000..5ee8a83 --- /dev/null +++ b/app/llm/query_translator.py @@ -0,0 +1,90 @@ +# app/llm/query_translator.py +# BSL 1.1 License +"""LLM query builder — translates natural language to eBay SearchFilters. + +The QueryTranslator calls LLMRouter.complete() (synchronous) with a domain-aware +system prompt. The prompt includes category hints injected from EbayCategoryCache. +The LLM returns a single JSON object matching SearchParamsResponse. +""" +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from typing import Optional + +log = logging.getLogger(__name__) + + +class QueryTranslatorError(Exception): + """Raised when the LLM output cannot be parsed into SearchParamsResponse.""" + def __init__(self, message: str, raw: str = "") -> None: + super().__init__(message) + self.raw = raw + + +@dataclass(frozen=True) +class SearchParamsResponse: + """Parsed LLM response — maps 1:1 to the /api/search query parameters.""" + base_query: str + must_include_mode: str # "all" | "any" | "groups" + must_include: str # raw filter string + must_exclude: str # comma-separated exclusion terms + max_price: Optional[float] + min_price: Optional[float] + condition: list[str] # subset of ["new", "used", "for_parts"] + category_id: Optional[str] # eBay category ID string, or None + explanation: str # one-sentence plain-language summary + + +_VALID_MODES = {"all", "any", "groups"} +_VALID_CONDITIONS = {"new", "used", "for_parts"} + + +def _parse_response(raw: str) -> SearchParamsResponse: + """Parse the LLM's raw text output into a SearchParamsResponse. + + Raises QueryTranslatorError if the JSON is malformed or required fields + are missing. + """ + try: + data = json.loads(raw.strip()) + except json.JSONDecodeError as exc: + raise QueryTranslatorError(f"LLM returned unparseable JSON: {exc}", raw=raw) from exc + + try: + base_query = str(data["base_query"]).strip() + if not base_query: + raise KeyError("base_query is empty") + must_include_mode = str(data.get("must_include_mode", "all")) + if must_include_mode not in _VALID_MODES: + must_include_mode = "all" + must_include = str(data.get("must_include", "")) + must_exclude = str(data.get("must_exclude", "")) + max_price = float(data["max_price"]) if data.get("max_price") is not None else None + min_price = float(data["min_price"]) if data.get("min_price") is not None else None + raw_conditions = data.get("condition", []) + condition = [c for c in raw_conditions if c in _VALID_CONDITIONS] + category_id = str(data["category_id"]) if data.get("category_id") else None + explanation = str(data.get("explanation", "")).strip() + except (KeyError, TypeError, ValueError) as exc: + raise QueryTranslatorError( + f"LLM response missing or invalid field: {exc}", raw=raw + ) from exc + + return SearchParamsResponse( + base_query=base_query, + must_include_mode=must_include_mode, + must_include=must_include, + must_exclude=must_exclude, + max_price=max_price, + min_price=min_price, + condition=condition, + category_id=category_id, + explanation=explanation, + ) + + +class QueryTranslator: + """Stub — implemented in Task 6.""" + pass diff --git a/tests/test_query_translator.py b/tests/test_query_translator.py new file mode 100644 index 0000000..53380c4 --- /dev/null +++ b/tests/test_query_translator.py @@ -0,0 +1,72 @@ +"""Unit tests for QueryTranslator — LLMRouter mocked at boundary.""" +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from app.llm.query_translator import QueryTranslatorError, SearchParamsResponse, _parse_response + + +# ── _parse_response ─────────────────────────────────────────────────────────── + +def test_parse_response_happy_path(): + raw = json.dumps({ + "base_query": "RTX 3080", + "must_include_mode": "groups", + "must_include": "rtx|geforce, 3080", + "must_exclude": "mining,for parts", + "max_price": 300.0, + "min_price": None, + "condition": ["used"], + "category_id": "27386", + "explanation": "Searching for used RTX 3080 GPUs under $300.", + }) + result = _parse_response(raw) + assert result.base_query == "RTX 3080" + assert result.must_include_mode == "groups" + assert result.max_price == 300.0 + assert result.min_price is None + assert result.condition == ["used"] + assert result.category_id == "27386" + assert "RTX 3080" in result.explanation + + +def test_parse_response_missing_optional_fields(): + raw = json.dumps({ + "base_query": "vintage camera", + "must_include_mode": "all", + "must_include": "", + "must_exclude": "", + "max_price": None, + "min_price": None, + "condition": [], + "category_id": None, + "explanation": "Searching for vintage cameras.", + }) + result = _parse_response(raw) + assert result.category_id is None + assert result.max_price is None + assert result.condition == [] + + +def test_parse_response_invalid_json(): + with pytest.raises(QueryTranslatorError, match="unparseable"): + _parse_response("this is not json {{{ garbage") + + +def test_parse_response_missing_required_field(): + # base_query is required — missing it should raise + raw = json.dumps({ + "must_include_mode": "all", + "must_include": "", + "must_exclude": "", + "max_price": None, + "min_price": None, + "condition": [], + "category_id": None, + "explanation": "oops", + }) + with pytest.raises(QueryTranslatorError): + _parse_response(raw)