kiwi/app/services/ocr/docuvision_client.py
pyr0ball 22e57118df feat: add DocuvisionClient + cf-docuvision fast-path for OCR
Introduces a thin HTTP client for the cf-docuvision service and wires it
as a fast path in VisionLanguageOCR.extract_receipt_data(). When CF_ORCH_URL
is set, the pipeline attempts docuvision allocation via CFOrchClient before
loading the heavy local VLM; falls back gracefully if unavailable.
2026-04-02 12:33:05 -07:00

60 lines
1.7 KiB
Python

"""Thin HTTP client for the cf-docuvision document vision service."""
from __future__ import annotations
import base64
from dataclasses import dataclass
from pathlib import Path
import httpx
@dataclass
class DocuvisionResult:
text: str
confidence: float | None = None
raw: dict | None = None
class DocuvisionClient:
"""Thin client for the cf-docuvision service."""
def __init__(self, base_url: str) -> None:
self._base_url = base_url.rstrip("/")
def extract_text(self, image_path: str | Path) -> DocuvisionResult:
"""Send an image to docuvision and return extracted text."""
image_bytes = Path(image_path).read_bytes()
b64 = base64.b64encode(image_bytes).decode()
with httpx.Client(timeout=30.0) as client:
resp = client.post(
f"{self._base_url}/extract",
json={"image": b64},
)
resp.raise_for_status()
data = resp.json()
return DocuvisionResult(
text=data.get("text", ""),
confidence=data.get("confidence"),
raw=data,
)
async def extract_text_async(self, image_path: str | Path) -> DocuvisionResult:
"""Async version."""
image_bytes = Path(image_path).read_bytes()
b64 = base64.b64encode(image_bytes).decode()
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"{self._base_url}/extract",
json={"image": b64},
)
resp.raise_for_status()
data = resp.json()
return DocuvisionResult(
text=data.get("text", ""),
confidence=data.get("confidence"),
raw=data,
)