Compare commits
5 commits
6e954c5c6e
...
4ac24e7920
| Author | SHA1 | Date | |
|---|---|---|---|
| 4ac24e7920 | |||
| cdbc24240a | |||
| dd39418bc8 | |||
| 02abc8e734 | |||
| 61c428baf0 |
9 changed files with 680 additions and 65 deletions
|
|
@ -2,17 +2,20 @@
|
||||||
# BSL 1.1 — LLM feature
|
# BSL 1.1 — LLM feature
|
||||||
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
||||||
|
|
||||||
Cloud (CF_ORCH_URL set):
|
Cloud (CF_ORCH_URL set), tier 1 — task-based routing (preferred):
|
||||||
Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
|
Calls /api/inference/task with product=kiwi, task=meal_plan.
|
||||||
Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
|
The coordinator resolves the model from assignments.yaml.
|
||||||
with a .complete(system, user, **kwargs) interface.
|
|
||||||
|
Cloud (CF_ORCH_URL set), tier 2 — direct allocation (fallback):
|
||||||
|
Allocates cf-text directly via client.allocate(). Used when the task
|
||||||
|
is not yet registered in the coordinator (cf-orch#61 not deployed).
|
||||||
|
|
||||||
Local / self-hosted (no CF_ORCH_URL):
|
Local / self-hosted (no CF_ORCH_URL):
|
||||||
Returns an LLMRouter instance which tries ollama, vllm, or any
|
Returns an LLMRouter instance which tries ollama, vllm, or any
|
||||||
backend configured in ~/.config/circuitforge/llm.yaml.
|
backend configured in ~/.config/circuitforge/llm.yaml.
|
||||||
|
|
||||||
Both paths expose the same interface so llm_timing.py and llm_planner.py
|
All paths expose the same (router, ctx) interface so llm_planner.py
|
||||||
need no knowledge of the backend.
|
needs no knowledge of the backend.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -22,8 +25,7 @@ from contextlib import nullcontext
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# cf-orch service name and VRAM budget for meal plan LLM tasks.
|
# cf-orch service name and TTL for direct-allocate fallback path.
|
||||||
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
|
|
||||||
_SERVICE_TYPE = "cf-text"
|
_SERVICE_TYPE = "cf-text"
|
||||||
_TTL_S = 120.0
|
_TTL_S = 120.0
|
||||||
_CALLER = "kiwi-meal-plan"
|
_CALLER = "kiwi-meal-plan"
|
||||||
|
|
@ -62,16 +64,58 @@ class _OrchTextRouter:
|
||||||
return resp.choices[0].message.content or ""
|
return resp.choices[0].message.content or ""
|
||||||
|
|
||||||
|
|
||||||
|
# Imported at module level so tests can patch the names in this module's namespace.
|
||||||
|
# app.services.task_inference.task_allocate — patch target for task routing tests.
|
||||||
|
try:
|
||||||
|
from app.services.task_inference import TaskNotRegistered, task_allocate
|
||||||
|
_HAS_TASK_INFERENCE = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_TASK_INFERENCE = False
|
||||||
|
|
||||||
|
# circuitforge_orch.client.CFOrchClient — patch target for direct-allocate fallback tests.
|
||||||
|
try:
|
||||||
|
from circuitforge_orch.client import CFOrchClient
|
||||||
|
except ImportError:
|
||||||
|
CFOrchClient = None # type: ignore[assignment,misc]
|
||||||
|
|
||||||
|
# circuitforge_core.llm.router.LLMRouter — patch target for local-inference tests.
|
||||||
|
try:
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
except (ImportError, FileNotFoundError):
|
||||||
|
LLMRouter = None # type: ignore[assignment,misc]
|
||||||
|
|
||||||
|
|
||||||
def get_meal_plan_router():
|
def get_meal_plan_router():
|
||||||
"""Return an LLM client for meal plan tasks.
|
"""Return an LLM client for meal plan tasks.
|
||||||
|
|
||||||
Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
|
Returns (router, ctx) where ctx is a context manager the caller holds
|
||||||
(local ollama/vllm). Returns None if no backend is available.
|
open for the duration of the LLM call. Returns (None, nullcontext(None))
|
||||||
|
if no backend is available.
|
||||||
"""
|
"""
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
|
|
||||||
if cf_orch_url:
|
if cf_orch_url:
|
||||||
|
# Tier 1: task-based routing — coordinator owns model selection.
|
||||||
|
if _HAS_TASK_INFERENCE:
|
||||||
|
try:
|
||||||
|
ctx = task_allocate(
|
||||||
|
"kiwi", "meal_plan",
|
||||||
|
service_hint=_SERVICE_TYPE,
|
||||||
|
ttl_s=_TTL_S,
|
||||||
|
)
|
||||||
|
alloc = ctx.__enter__()
|
||||||
|
return _OrchTextRouter(alloc.url), ctx
|
||||||
|
except TaskNotRegistered:
|
||||||
|
logger.debug(
|
||||||
|
"kiwi.meal_plan not in coordinator assignments — "
|
||||||
|
"falling back to direct cf-text allocation"
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("task allocation failed, trying direct allocate: %s", exc)
|
||||||
|
|
||||||
|
# Tier 2: direct allocation — hardcoded service type.
|
||||||
|
if CFOrchClient is not None:
|
||||||
try:
|
try:
|
||||||
from circuitforge_orch.client import CFOrchClient
|
|
||||||
client = CFOrchClient(cf_orch_url)
|
client = CFOrchClient(cf_orch_url)
|
||||||
ctx = client.allocate(
|
ctx = client.allocate(
|
||||||
service=_SERVICE_TYPE,
|
service=_SERVICE_TYPE,
|
||||||
|
|
@ -81,12 +125,13 @@ def get_meal_plan_router():
|
||||||
alloc = ctx.__enter__()
|
alloc = ctx.__enter__()
|
||||||
if alloc is not None:
|
if alloc is not None:
|
||||||
return _OrchTextRouter(alloc.url), ctx
|
return _OrchTextRouter(alloc.url), ctx
|
||||||
|
ctx.__exit__(None, None, None) # release allocation before falling through
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
||||||
|
|
||||||
# Local fallback: LLMRouter (ollama / vllm / openai-compat)
|
# Tier 3: local inference — ollama / vllm / openai-compat.
|
||||||
|
if LLMRouter is not None:
|
||||||
try:
|
try:
|
||||||
from circuitforge_core.llm.router import LLMRouter
|
|
||||||
return LLMRouter(), nullcontext(None)
|
return LLMRouter(), nullcontext(None)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
||||||
|
|
@ -94,3 +139,4 @@ def get_meal_plan_router():
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("LLMRouter init failed: %s", exc)
|
logger.debug("LLMRouter init failed: %s", exc)
|
||||||
return None, nullcontext(None)
|
return None, nullcontext(None)
|
||||||
|
return None, nullcontext(None)
|
||||||
|
|
|
||||||
|
|
@ -18,43 +18,51 @@ class DocuvisionResult:
|
||||||
class DocuvisionClient:
|
class DocuvisionClient:
|
||||||
"""Thin client for the cf-docuvision service."""
|
"""Thin client for the cf-docuvision service."""
|
||||||
|
|
||||||
def __init__(self, base_url: str) -> None:
|
def __init__(self, base_url: str, timeout: float = 120.0) -> None:
|
||||||
self._base_url = base_url.rstrip("/")
|
self._base_url = base_url.rstrip("/")
|
||||||
|
self._timeout = timeout
|
||||||
|
|
||||||
def extract_text(self, image_path: str | Path) -> DocuvisionResult:
|
def extract_text(self, image_path: str | Path, hint: str = "text") -> DocuvisionResult:
|
||||||
"""Send an image to docuvision and return extracted text."""
|
"""Send an image to docuvision and return extracted text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: Path to the image file.
|
||||||
|
hint: Docuvision extraction hint — "text" for dense prose (recipes),
|
||||||
|
"table" for tabular data, "form" for form fields, "auto" for
|
||||||
|
automatic detection.
|
||||||
|
"""
|
||||||
image_bytes = Path(image_path).read_bytes()
|
image_bytes = Path(image_path).read_bytes()
|
||||||
b64 = base64.b64encode(image_bytes).decode()
|
b64 = base64.b64encode(image_bytes).decode()
|
||||||
|
|
||||||
with httpx.Client(timeout=30.0) as client:
|
with httpx.Client(timeout=self._timeout) as client:
|
||||||
resp = client.post(
|
resp = client.post(
|
||||||
f"{self._base_url}/extract",
|
f"{self._base_url}/extract",
|
||||||
json={"image": b64},
|
json={"image_b64": b64, "hint": hint},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
|
||||||
return DocuvisionResult(
|
return DocuvisionResult(
|
||||||
text=data.get("text", ""),
|
text=data.get("raw_text", ""),
|
||||||
confidence=data.get("confidence"),
|
confidence=data.get("metadata", {}).get("confidence"),
|
||||||
raw=data,
|
raw=data,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def extract_text_async(self, image_path: str | Path) -> DocuvisionResult:
|
async def extract_text_async(self, image_path: str | Path, hint: str = "text") -> DocuvisionResult:
|
||||||
"""Async version."""
|
"""Async version."""
|
||||||
image_bytes = Path(image_path).read_bytes()
|
image_bytes = Path(image_path).read_bytes()
|
||||||
b64 = base64.b64encode(image_bytes).decode()
|
b64 = base64.b64encode(image_bytes).decode()
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{self._base_url}/extract",
|
f"{self._base_url}/extract",
|
||||||
json={"image": b64},
|
json={"image_b64": b64, "hint": hint},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
|
||||||
return DocuvisionResult(
|
return DocuvisionResult(
|
||||||
text=data.get("text", ""),
|
text=data.get("raw_text", ""),
|
||||||
confidence=data.get("confidence"),
|
confidence=data.get("metadata", {}).get("confidence"),
|
||||||
raw=data,
|
raw=data,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,29 @@ def _try_docuvision(image_path: str | Path) -> str | None:
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
if not cf_orch_url:
|
if not cf_orch_url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Tier 1: task-based routing — coordinator owns model selection.
|
||||||
|
try:
|
||||||
|
from app.services.task_inference import task_allocate, TaskNotRegistered
|
||||||
|
from app.services.ocr.docuvision_client import DocuvisionClient
|
||||||
|
try:
|
||||||
|
with task_allocate(
|
||||||
|
"kiwi", "ocr",
|
||||||
|
service_hint="cf-docuvision",
|
||||||
|
ttl_s=60.0,
|
||||||
|
) as alloc:
|
||||||
|
doc_client = DocuvisionClient(alloc.url)
|
||||||
|
result = doc_client.extract_text(image_path)
|
||||||
|
return result.text if result.text else None
|
||||||
|
except TaskNotRegistered:
|
||||||
|
logger.debug(
|
||||||
|
"kiwi.ocr not in coordinator assignments — "
|
||||||
|
"falling back to direct cf-docuvision allocation"
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("task allocation path failed, trying direct allocate: %s", exc)
|
||||||
|
|
||||||
|
# Tier 2: direct allocation — hardcoded service type.
|
||||||
try:
|
try:
|
||||||
from circuitforge_orch.client import CFOrchClient
|
from circuitforge_orch.client import CFOrchClient
|
||||||
from app.services.ocr.docuvision_client import DocuvisionClient
|
from app.services.ocr.docuvision_client import DocuvisionClient
|
||||||
|
|
@ -49,7 +72,7 @@ def _try_docuvision(image_path: str | Path) -> str | None:
|
||||||
result = doc_client.extract_text(image_path)
|
result = doc_client.extract_text(image_path)
|
||||||
return result.text if result.text else None
|
return result.text if result.text else None
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("cf-docuvision fast-path failed, falling back: %s", exc)
|
logger.debug("cf-docuvision fast-path failed, falling back to local VLM: %s", exc)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -196,34 +196,63 @@ def _call_via_local_vlm(image_paths: list[Path], prompt: str) -> str:
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def _build_ocr_extraction_prompt(ocr_text: str) -> str:
|
||||||
|
"""Build a text-LLM prompt for structuring OCR output into recipe JSON.
|
||||||
|
|
||||||
|
Swaps the image-centric preamble of _EXTRACTION_PROMPT for an OCR-centric
|
||||||
|
one, then appends the combined OCR text as input. The JSON schema section
|
||||||
|
is shared verbatim to keep the two paths in sync.
|
||||||
|
"""
|
||||||
|
schema_idx = _EXTRACTION_PROMPT.find("Return a single JSON object")
|
||||||
|
schema_part = _EXTRACTION_PROMPT[schema_idx:] if schema_idx != -1 else _EXTRACTION_PROMPT
|
||||||
|
return (
|
||||||
|
"You are extracting a recipe from OCR text taken from a recipe card, "
|
||||||
|
"cookbook page, or handwritten note.\n\n"
|
||||||
|
"The text below was obtained via optical character recognition and may "
|
||||||
|
"contain minor scanning artifacts or formatting irregularities.\n\n"
|
||||||
|
f"{schema_part}\n\nOCR Text:\n{ocr_text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _call_vision_backend(image_paths: list[Path], prompt: str) -> str:
|
def _call_vision_backend(image_paths: list[Path], prompt: str) -> str:
|
||||||
"""Dispatch to the best available vision backend.
|
"""Dispatch to the best available vision backend.
|
||||||
|
|
||||||
Priority: cf-orch vision -> local Qwen2.5-VL -> Anthropic API.
|
Priority: cf-orch docuvision (OCR + text LLM) -> local Qwen2.5-VL -> Anthropic API.
|
||||||
Raises RuntimeError with a clear message when no backend is available.
|
Raises RuntimeError with a clear message when no backend is available.
|
||||||
"""
|
"""
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
|
|
||||||
# 1. Try cf-orch vision allocation
|
# 1. Try cf-orch task allocation → cf-docuvision OCR, then text LLM structuring.
|
||||||
|
# Two-step: docuvision extracts text from the image(s), then LLMRouter
|
||||||
|
# converts the OCR text to structured recipe JSON using the extraction prompt.
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
if cf_orch_url:
|
if cf_orch_url:
|
||||||
try:
|
try:
|
||||||
from circuitforge_orch.client import CFOrchClient
|
from app.services.task_inference import TaskNotRegistered, task_allocate
|
||||||
from app.services.ocr.docuvision_client import DocuvisionClient
|
from app.services.ocr.docuvision_client import DocuvisionClient
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
|
||||||
client = CFOrchClient(cf_orch_url)
|
try:
|
||||||
with client.allocate(
|
with task_allocate("kiwi", "recipe_scan", service_hint="cf-docuvision", ttl_s=120.0) as alloc:
|
||||||
service="cf-vision",
|
# Step 1: OCR each image via cf-docuvision
|
||||||
model_candidates=["qwen2.5-vl-7b", "cf-docuvision"],
|
|
||||||
ttl_s=90.0,
|
|
||||||
caller="kiwi-recipe-scan",
|
|
||||||
) as alloc:
|
|
||||||
if alloc is not None:
|
|
||||||
doc_client = DocuvisionClient(alloc.url)
|
doc_client = DocuvisionClient(alloc.url)
|
||||||
# docuvision takes a single image -- use first image only for now
|
ocr_parts: list[str] = []
|
||||||
result = doc_client.extract_text(image_paths[0])
|
for i, path in enumerate(image_paths):
|
||||||
if result.text:
|
result = doc_client.extract_text(path, hint="text")
|
||||||
return result.text
|
prefix = f"(Page {i + 1} of the same recipe)\n" if len(image_paths) > 1 else ""
|
||||||
|
ocr_parts.append(f"{prefix}{result.text}")
|
||||||
|
combined_ocr = "\n\n".join(ocr_parts)
|
||||||
|
|
||||||
|
if not combined_ocr.strip():
|
||||||
|
raise ValueError("Docuvision returned no text — image may not be a recipe")
|
||||||
|
|
||||||
|
# Step 2: Text LLM structures OCR output into recipe JSON
|
||||||
|
text = LLMRouter().complete(_build_ocr_extraction_prompt(combined_ocr))
|
||||||
|
if text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
except TaskNotRegistered:
|
||||||
|
logger.debug("kiwi.recipe_scan not yet registered in cf-orch assignments")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("cf-orch vision failed for recipe scan: %s", exc)
|
logger.debug("cf-orch vision failed for recipe scan: %s", exc)
|
||||||
errors.append(f"cf-orch: {exc}")
|
errors.append(f"cf-orch: {exc}")
|
||||||
|
|
|
||||||
124
app/services/task_inference.py
Normal file
124
app/services/task_inference.py
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
# app/services/task_inference.py
|
||||||
|
# BSL 1.1 — LLM feature
|
||||||
|
"""Task-based service allocation via the cf-orch coordinator.
|
||||||
|
|
||||||
|
Calls POST /api/inference/task instead of a hardcoded service type.
|
||||||
|
The coordinator resolves model_id and service_type from assignments.yaml.
|
||||||
|
|
||||||
|
Fallback contract (for callers):
|
||||||
|
- 404 → TaskNotRegistered (fall back to direct client.allocate())
|
||||||
|
- other error → RuntimeError
|
||||||
|
- CF_ORCH_URL unset → RuntimeError (guard with os.environ.get first)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from collections.abc import Generator
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskNotRegistered(Exception):
|
||||||
|
"""Coordinator returned 404 for a product/task pair.
|
||||||
|
|
||||||
|
Means the task is not yet in assignments.yaml. Callers should fall
|
||||||
|
back to direct service allocation (client.allocate()).
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Allocation:
|
||||||
|
url: str
|
||||||
|
allocation_id: str
|
||||||
|
service: str
|
||||||
|
|
||||||
|
|
||||||
|
def _orch_url() -> str:
|
||||||
|
return os.environ.get("CF_ORCH_URL", "").rstrip("/")
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def task_allocate(
|
||||||
|
product: str,
|
||||||
|
task: str,
|
||||||
|
*,
|
||||||
|
service_hint: str,
|
||||||
|
ttl_s: float = 120.0,
|
||||||
|
) -> Generator[Allocation, None, None]:
|
||||||
|
"""Context manager: allocate a service via task-based routing.
|
||||||
|
|
||||||
|
Calls POST /api/inference/task, yields Allocation, releases on exit.
|
||||||
|
Supports both `with task_allocate(...) as alloc:` and manual
|
||||||
|
`ctx = task_allocate(...); alloc = ctx.__enter__()` patterns.
|
||||||
|
|
||||||
|
**Sync-only**: uses the synchronous httpx API. Do not call from an
|
||||||
|
``async def`` handler without wrapping in ``asyncio.to_thread``. Current
|
||||||
|
call sites (``llm_router.py``, ``vl_model.py``) are synchronous.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
product: CF product name (e.g. "kiwi")
|
||||||
|
task: Task identifier (e.g. "meal_plan", "ocr")
|
||||||
|
service_hint: Service type for the release DELETE call. The
|
||||||
|
coordinator response does not include service_type, so the
|
||||||
|
caller provides it. When the coordinator is updated to return
|
||||||
|
service in the response (cf-orch#63), this becomes unused.
|
||||||
|
ttl_s: Allocation TTL in seconds.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
TaskNotRegistered: Coordinator returned 404.
|
||||||
|
RuntimeError: Coordinator unreachable, returned non-404 error, or
|
||||||
|
returned a malformed (non-JSON / missing fields) response.
|
||||||
|
RuntimeError: CF_ORCH_URL is not set.
|
||||||
|
"""
|
||||||
|
base = _orch_url()
|
||||||
|
if not base:
|
||||||
|
raise RuntimeError("CF_ORCH_URL is not set")
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
f"{base}/api/inference/task",
|
||||||
|
json={"product": product, "task": task, "payload": {}},
|
||||||
|
timeout=30.0,
|
||||||
|
)
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
raise RuntimeError(f"cf-orch unreachable: {exc}") from exc
|
||||||
|
|
||||||
|
if resp.status_code == 404:
|
||||||
|
raise TaskNotRegistered(
|
||||||
|
f"No assignment for product={product!r} task={task!r} — "
|
||||||
|
"ensure cf-orch#61/62 are deployed and coordinator reloaded"
|
||||||
|
)
|
||||||
|
if not resp.is_success:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"cf-orch /api/inference/task failed: "
|
||||||
|
f"HTTP {resp.status_code} — {resp.text[:200]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = resp.json()
|
||||||
|
alloc = Allocation(
|
||||||
|
url=data["url"],
|
||||||
|
allocation_id=data["allocation_id"],
|
||||||
|
service=data.get("service") or service_hint,
|
||||||
|
)
|
||||||
|
except (KeyError, ValueError) as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"cf-orch /api/inference/task returned malformed response: {exc} — "
|
||||||
|
f"body: {resp.text[:200]}"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield alloc
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
httpx.delete(
|
||||||
|
f"{base}/api/services/{alloc.service}/allocations/{alloc.allocation_id}",
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("cf-orch task allocation release failed (non-fatal): %s", exc)
|
||||||
127
tests/services/test_llm_router_task.py
Normal file
127
tests/services/test_llm_router_task.py
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
"""Tests for task-based routing added to get_meal_plan_router()."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||||
|
"""Mock context manager returned by task_allocate()."""
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
alloc.allocation_id = "alloc-task-1"
|
||||||
|
alloc.service = "cf-text"
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx_not_registered() -> MagicMock:
|
||||||
|
"""Mock context manager that raises TaskNotRegistered on enter."""
|
||||||
|
from app.services.task_inference import TaskNotRegistered
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_direct_alloc_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||||
|
"""Mock context manager returned by CFOrchClient.allocate()."""
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_path_returns_orch_router_on_success(monkeypatch):
|
||||||
|
"""get_meal_plan_router() returns _OrchTextRouter when task allocation succeeds."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
import unittest.mock as um
|
||||||
|
# Patch the name as it exists in llm_router's own namespace (module-level import).
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx(url="http://node:9001")):
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9001"
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_not_registered_falls_back_to_direct_allocate(monkeypatch):
|
||||||
|
"""get_meal_plan_router() falls back to direct cf-text allocation on TaskNotRegistered."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
direct_ctx = _make_direct_alloc_ctx(url="http://node:9002")
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
# Patch task_allocate in llm_router's namespace so TaskNotRegistered is raised.
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx_not_registered()), \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
|
||||||
|
MockClient.return_value.allocate.return_value = direct_ctx
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9002"
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_cf_orch_url_returns_llm_router(monkeypatch):
|
||||||
|
"""get_meal_plan_router() returns LLMRouter when CF_ORCH_URL is not set."""
|
||||||
|
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
mock_lr = MagicMock()
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert router is mock_lr
|
||||||
|
|
||||||
|
|
||||||
|
def test_tier1_general_exception_falls_back_to_direct_allocate(monkeypatch):
|
||||||
|
"""get_meal_plan_router() falls back to direct allocation when task_allocate raises RuntimeError."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
direct_ctx = _make_direct_alloc_ctx(url="http://node:9003")
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
failing_ctx = MagicMock()
|
||||||
|
failing_ctx.__enter__ = MagicMock(side_effect=RuntimeError("coordinator down"))
|
||||||
|
failing_ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=failing_ctx), \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
|
||||||
|
MockClient.return_value.allocate.return_value = direct_ctx
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9003"
|
||||||
|
|
||||||
|
|
||||||
|
def test_tier2_none_alloc_releases_ctx_and_falls_through(monkeypatch):
|
||||||
|
"""get_meal_plan_router() releases Tier 2 ctx and falls through when alloc is None."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
|
||||||
|
none_alloc_ctx = MagicMock()
|
||||||
|
none_alloc_ctx.__enter__ = MagicMock(return_value=None)
|
||||||
|
none_alloc_ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
mock_lr = MagicMock()
|
||||||
|
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx_not_registered()), \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient, \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
|
||||||
|
MockClient.return_value.allocate.return_value = none_alloc_ctx
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert router is mock_lr
|
||||||
|
none_alloc_ctx.__exit__.assert_called_once_with(None, None, None)
|
||||||
164
tests/services/test_task_inference.py
Normal file
164
tests/services/test_task_inference.py
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
"""Tests for app/services/task_inference.py"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _ok_resp(url: str = "http://node:8080", allocation_id: str = "alloc-123") -> MagicMock:
|
||||||
|
m = MagicMock()
|
||||||
|
m.status_code = 200
|
||||||
|
m.is_success = True
|
||||||
|
m.json.return_value = {
|
||||||
|
"url": url,
|
||||||
|
"allocation_id": allocation_id,
|
||||||
|
"gpu_id": 0,
|
||||||
|
"started": True,
|
||||||
|
"warm": False,
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
def _err_resp(status_code: int, text: str = "error") -> MagicMock:
|
||||||
|
m = MagicMock()
|
||||||
|
m.status_code = status_code
|
||||||
|
m.is_success = False
|
||||||
|
m.text = text
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_yields_allocation_on_200(monkeypatch):
|
||||||
|
"""task_allocate() yields Allocation with url, allocation_id, service on 200."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp()) as mock_post, \
|
||||||
|
patch("app.services.task_inference.httpx.delete") as mock_del:
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text") as alloc:
|
||||||
|
assert alloc.url == "http://node:8080"
|
||||||
|
assert alloc.allocation_id == "alloc-123"
|
||||||
|
assert alloc.service == "cf-text"
|
||||||
|
called_url = mock_post.call_args[0][0]
|
||||||
|
assert called_url == "http://coord:7700/api/inference/task"
|
||||||
|
mock_del.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_uses_service_from_response_when_present(monkeypatch):
|
||||||
|
"""task_allocate() uses service from response dict over service_hint when available."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
resp = _ok_resp()
|
||||||
|
resp.json.return_value["service"] = "cf-vision"
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=resp), \
|
||||||
|
patch("app.services.task_inference.httpx.delete"):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "ocr", service_hint="cf-docuvision") as alloc:
|
||||||
|
assert alloc.service == "cf-vision"
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_404_raises_task_not_registered(monkeypatch):
|
||||||
|
"""task_allocate() raises TaskNotRegistered on coordinator 404."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_err_resp(404)):
|
||||||
|
from app.services.task_inference import task_allocate, TaskNotRegistered
|
||||||
|
with pytest.raises(TaskNotRegistered):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_503_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError on non-404 coordinator errors."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_err_resp(503, "no GPU")):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="HTTP 503"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_release_called_on_clean_exit(monkeypatch):
|
||||||
|
"""task_allocate() DELETEs the allocation on clean context exit."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp(allocation_id="xyz")), \
|
||||||
|
patch("app.services.task_inference.httpx.delete") as mock_del:
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
release_url = mock_del.call_args[0][0]
|
||||||
|
assert "cf-text" in release_url
|
||||||
|
assert "xyz" in release_url
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_release_called_when_inner_block_raises(monkeypatch):
|
||||||
|
"""task_allocate() DELETEs the allocation even when the inner block raises."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp(allocation_id="abc")), \
|
||||||
|
patch("app.services.task_inference.httpx.delete") as mock_del:
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
raise ValueError("inner error")
|
||||||
|
mock_del.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_release_failure_is_swallowed(monkeypatch):
|
||||||
|
"""task_allocate() does not propagate DELETE failures."""
|
||||||
|
import httpx as _httpx
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp()), \
|
||||||
|
patch("app.services.task_inference.httpx.delete",
|
||||||
|
side_effect=_httpx.RequestError("gone", request=MagicMock())):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text") as alloc:
|
||||||
|
assert alloc.url == "http://node:8080"
|
||||||
|
# no exception raised
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_no_orch_url_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError when CF_ORCH_URL is not set."""
|
||||||
|
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="CF_ORCH_URL"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_network_error_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() wraps httpx.RequestError in RuntimeError."""
|
||||||
|
import httpx as _httpx
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post",
|
||||||
|
side_effect=_httpx.RequestError("timeout", request=MagicMock())):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="unreachable"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_malformed_json_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError when coordinator returns non-JSON on 200."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
bad_resp = MagicMock()
|
||||||
|
bad_resp.status_code = 200
|
||||||
|
bad_resp.is_success = True
|
||||||
|
bad_resp.text = "<html>proxy error</html>"
|
||||||
|
bad_resp.json.side_effect = ValueError("not json")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=bad_resp):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="malformed"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_missing_url_field_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError when coordinator response is missing url field."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
bad_resp = MagicMock()
|
||||||
|
bad_resp.status_code = 200
|
||||||
|
bad_resp.is_success = True
|
||||||
|
bad_resp.text = '{"allocation_id": "x"}'
|
||||||
|
bad_resp.json.return_value = {"allocation_id": "x"} # missing "url"
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=bad_resp):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="malformed"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
88
tests/services/test_vl_model_task.py
Normal file
88
tests/services/test_vl_model_task.py
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
"""Tests for task-based routing added to _try_docuvision()."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_doc_result(text: str = "RECEIPT TEXT") -> MagicMock:
|
||||||
|
r = MagicMock()
|
||||||
|
r.text = text
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx(url: str = "http://node:9010") -> MagicMock:
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
alloc.allocation_id = "alloc-vis-1"
|
||||||
|
alloc.service = "cf-docuvision"
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_not_registered() -> MagicMock:
|
||||||
|
from app.services.task_inference import TaskNotRegistered
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_direct_alloc(url: str = "http://node:9011") -> MagicMock:
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def test_try_docuvision_task_path_returns_text(monkeypatch, tmp_path):
|
||||||
|
"""_try_docuvision() uses task allocation and returns extracted text on success."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
fake_image = tmp_path / "receipt.jpg"
|
||||||
|
fake_image.write_bytes(b"fake")
|
||||||
|
|
||||||
|
with patch("app.services.task_inference.task_allocate",
|
||||||
|
return_value=_make_task_ctx(url="http://node:9010")), \
|
||||||
|
patch("app.services.ocr.docuvision_client.DocuvisionClient") as MockDoc:
|
||||||
|
MockDoc.return_value.extract_text.return_value = _mock_doc_result("STORE $12.34")
|
||||||
|
from app.services.ocr.vl_model import _try_docuvision
|
||||||
|
result = _try_docuvision(str(fake_image))
|
||||||
|
|
||||||
|
assert result == "STORE $12.34"
|
||||||
|
MockDoc.assert_called_once_with("http://node:9010")
|
||||||
|
|
||||||
|
|
||||||
|
def test_try_docuvision_falls_back_to_direct_on_task_not_registered(monkeypatch, tmp_path):
|
||||||
|
"""_try_docuvision() falls back to direct cf-docuvision allocation on TaskNotRegistered."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
fake_image = tmp_path / "receipt.jpg"
|
||||||
|
fake_image.write_bytes(b"fake")
|
||||||
|
|
||||||
|
with patch("app.services.task_inference.task_allocate",
|
||||||
|
return_value=_make_task_not_registered()), \
|
||||||
|
patch("circuitforge_orch.client.CFOrchClient") as MockClient, \
|
||||||
|
patch("app.services.ocr.docuvision_client.DocuvisionClient") as MockDoc:
|
||||||
|
MockClient.return_value.allocate.return_value = _make_direct_alloc("http://node:9011")
|
||||||
|
MockDoc.return_value.extract_text.return_value = _mock_doc_result("FALLBACK TEXT")
|
||||||
|
from app.services.ocr.vl_model import _try_docuvision
|
||||||
|
result = _try_docuvision(str(fake_image))
|
||||||
|
|
||||||
|
assert result == "FALLBACK TEXT"
|
||||||
|
MockDoc.assert_called_once_with("http://node:9011")
|
||||||
|
|
||||||
|
|
||||||
|
def test_try_docuvision_returns_none_without_cf_orch_url(monkeypatch, tmp_path):
|
||||||
|
"""_try_docuvision() returns None immediately when CF_ORCH_URL is not set."""
|
||||||
|
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||||
|
fake_image = tmp_path / "receipt.jpg"
|
||||||
|
fake_image.write_bytes(b"fake")
|
||||||
|
|
||||||
|
from app.services.ocr.vl_model import _try_docuvision
|
||||||
|
result = _try_docuvision(str(fake_image))
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
@ -17,12 +17,17 @@ from app.services.ocr.docuvision_client import DocuvisionClient, DocuvisionResul
|
||||||
|
|
||||||
|
|
||||||
def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
|
def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
|
||||||
"""extract_text() POSTs a base64-encoded image and returns parsed text."""
|
"""extract_text() POSTs image_b64 and returns parsed raw_text."""
|
||||||
image_file = tmp_path / "test.jpg"
|
image_file = tmp_path / "test.jpg"
|
||||||
image_file.write_bytes(b"fake-image-bytes")
|
image_file.write_bytes(b"fake-image-bytes")
|
||||||
|
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_response.json.return_value = {"text": "Cheerios", "confidence": 0.95}
|
mock_response.json.return_value = {
|
||||||
|
"raw_text": "Cheerios",
|
||||||
|
"elements": [],
|
||||||
|
"tables": [],
|
||||||
|
"metadata": {"hint": "text", "confidence": 0.95},
|
||||||
|
}
|
||||||
mock_response.raise_for_status.return_value = None
|
mock_response.raise_for_status.return_value = None
|
||||||
|
|
||||||
with patch("httpx.Client") as mock_client_cls:
|
with patch("httpx.Client") as mock_client_cls:
|
||||||
|
|
@ -41,7 +46,8 @@ def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
|
||||||
assert call_kwargs[0][0] == "http://docuvision:8080/extract"
|
assert call_kwargs[0][0] == "http://docuvision:8080/extract"
|
||||||
posted_json = call_kwargs[1]["json"]
|
posted_json = call_kwargs[1]["json"]
|
||||||
expected_b64 = base64.b64encode(b"fake-image-bytes").decode()
|
expected_b64 = base64.b64encode(b"fake-image-bytes").decode()
|
||||||
assert posted_json["image"] == expected_b64
|
assert posted_json["image_b64"] == expected_b64
|
||||||
|
assert posted_json["hint"] == "text"
|
||||||
|
|
||||||
|
|
||||||
def test_extract_text_raises_on_http_error(tmp_path: Path) -> None:
|
def test_extract_text_raises_on_http_error(tmp_path: Path) -> None:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue