FastAPI microservice wrapping ByteDance/Dolphin-v2 (Qwen2.5-VL-3B base) for structured document extraction. Exposes POST /extract and GET /health. Maps Dolphin's 21 element types to cf-core's 7-type canonical schema. Services: cf-text /extract, /health Env vars: CF_DOCUVISION_MODEL, CF_DOCUVISION_DEVICE, CF_DOCUVISION_PORT GPU: 8GB+ VRAM required for Dolphin-v2; CPU fallback available but very slow.
113 lines
3.6 KiB
Python
113 lines
3.6 KiB
Python
# app/main.py — cf-docuvision FastAPI service
|
|
#
|
|
# Exposes POST /extract and GET /health.
|
|
# Response schema matches DocuvisionClient._parse_response() in cf-core.
|
|
#
|
|
# Start:
|
|
# uvicorn app.main:app --host 0.0.0.0 --port 8003
|
|
# CF_DOCUVISION_DEVICE=cuda uvicorn app.main:app ...
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import time
|
|
from contextlib import asynccontextmanager
|
|
from typing import Any
|
|
|
|
from fastapi import FastAPI, HTTPException
|
|
from pydantic import BaseModel
|
|
|
|
from app.dolphin import DolphinParser, dolphin_to_cf_elements
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
|
|
|
|
# ── Model lifecycle ───────────────────────────────────────────────────────────
|
|
|
|
_parser: DolphinParser | None = None
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
global _parser
|
|
logger.info("cf-docuvision: loading Dolphin-v2...")
|
|
_parser = DolphinParser.from_env()
|
|
logger.info("cf-docuvision: ready")
|
|
yield
|
|
_parser = None
|
|
|
|
|
|
app = FastAPI(
|
|
title="cf-docuvision",
|
|
description="Dolphin-v2 document parsing service for CircuitForge products.",
|
|
version="0.1.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
|
|
# ── Request / Response schemas ────────────────────────────────────────────────
|
|
|
|
class ExtractRequest(BaseModel):
|
|
image_b64: str
|
|
hint: str = "auto"
|
|
|
|
|
|
class ExtractResponse(BaseModel):
|
|
elements: list[dict[str, Any]]
|
|
tables: list[dict[str, Any]]
|
|
raw_text: str
|
|
metadata: dict[str, Any]
|
|
|
|
|
|
# ── Endpoints ─────────────────────────────────────────────────────────────────
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
"""Health check. Returns 200 when the model is loaded and ready."""
|
|
if _parser is None:
|
|
raise HTTPException(status_code=503, detail="Model not loaded")
|
|
return {"status": "ok", "model": _parser._model_id}
|
|
|
|
|
|
@app.post("/extract", response_model=ExtractResponse)
|
|
def extract(req: ExtractRequest):
|
|
"""
|
|
Parse a document image into structured elements.
|
|
|
|
Request body:
|
|
image_b64 Base64-encoded image bytes (JPEG, PNG, TIFF, PDF page, etc.)
|
|
hint Extraction focus: "auto" | "table" | "text" | "form"
|
|
|
|
Response matches the DocuvisionClient._parse_response() contract in cf-core.
|
|
"""
|
|
if _parser is None:
|
|
raise HTTPException(status_code=503, detail="Model not loaded")
|
|
|
|
if req.hint not in ("auto", "table", "text", "form"):
|
|
raise HTTPException(status_code=422, detail=f"Invalid hint {req.hint!r}")
|
|
|
|
t0 = time.monotonic()
|
|
try:
|
|
result = _parser.parse_b64(req.image_b64, hint=req.hint)
|
|
except Exception as exc:
|
|
logger.exception("cf-docuvision: parse failed")
|
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
|
|
|
elements, tables = dolphin_to_cf_elements(result)
|
|
elapsed_ms = round((time.monotonic() - t0) * 1000)
|
|
logger.info(
|
|
"cf-docuvision: extracted %d elements, %d tables in %dms",
|
|
len(elements), len(tables), elapsed_ms,
|
|
)
|
|
|
|
return ExtractResponse(
|
|
elements=elements,
|
|
tables=tables,
|
|
raw_text=result.raw_text,
|
|
metadata={
|
|
"source": "cf-docuvision",
|
|
"model": result.model,
|
|
"hint": req.hint,
|
|
"elapsed_ms": elapsed_ms,
|
|
},
|
|
)
|