Introduces a thin HTTP client for the cf-docuvision service and wires it as a fast path in VisionLanguageOCR.extract_receipt_data(). When CF_ORCH_URL is set, the pipeline attempts docuvision allocation via CFOrchClient before loading the heavy local VLM; falls back gracefully if unavailable.
60 lines
1.7 KiB
Python
60 lines
1.7 KiB
Python
"""Thin HTTP client for the cf-docuvision document vision service."""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
|
|
|
|
@dataclass
|
|
class DocuvisionResult:
|
|
text: str
|
|
confidence: float | None = None
|
|
raw: dict | None = None
|
|
|
|
|
|
class DocuvisionClient:
|
|
"""Thin client for the cf-docuvision service."""
|
|
|
|
def __init__(self, base_url: str) -> None:
|
|
self._base_url = base_url.rstrip("/")
|
|
|
|
def extract_text(self, image_path: str | Path) -> DocuvisionResult:
|
|
"""Send an image to docuvision and return extracted text."""
|
|
image_bytes = Path(image_path).read_bytes()
|
|
b64 = base64.b64encode(image_bytes).decode()
|
|
|
|
with httpx.Client(timeout=30.0) as client:
|
|
resp = client.post(
|
|
f"{self._base_url}/extract",
|
|
json={"image": b64},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
return DocuvisionResult(
|
|
text=data.get("text", ""),
|
|
confidence=data.get("confidence"),
|
|
raw=data,
|
|
)
|
|
|
|
async def extract_text_async(self, image_path: str | Path) -> DocuvisionResult:
|
|
"""Async version."""
|
|
image_bytes = Path(image_path).read_bytes()
|
|
b64 = base64.b64encode(image_bytes).decode()
|
|
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
resp = await client.post(
|
|
f"{self._base_url}/extract",
|
|
json={"image": b64},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
return DocuvisionResult(
|
|
text=data.get("text", ""),
|
|
confidence=data.get("confidence"),
|
|
raw=data,
|
|
)
|