cf-docuvision/app/main.py
pyr0ball 47d4dfc786 feat: initial cf-docuvision service — Dolphin-v2 document parsing
FastAPI microservice wrapping ByteDance/Dolphin-v2 (Qwen2.5-VL-3B base)
for structured document extraction. Exposes POST /extract and GET /health.
Maps Dolphin's 21 element types to cf-core's 7-type canonical schema.

Services: cf-text /extract, /health
Env vars: CF_DOCUVISION_MODEL, CF_DOCUVISION_DEVICE, CF_DOCUVISION_PORT
GPU: 8GB+ VRAM required for Dolphin-v2; CPU fallback available but very slow.
2026-06-05 10:25:18 -07:00

113 lines
3.6 KiB
Python

# app/main.py — cf-docuvision FastAPI service
#
# Exposes POST /extract and GET /health.
# Response schema matches DocuvisionClient._parse_response() in cf-core.
#
# Start:
# uvicorn app.main:app --host 0.0.0.0 --port 8003
# CF_DOCUVISION_DEVICE=cuda uvicorn app.main:app ...
from __future__ import annotations
import logging
import os
import time
from contextlib import asynccontextmanager
from typing import Any
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from app.dolphin import DolphinParser, dolphin_to_cf_elements
logger = logging.getLogger(__name__)
logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
# ── Model lifecycle ───────────────────────────────────────────────────────────
_parser: DolphinParser | None = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global _parser
logger.info("cf-docuvision: loading Dolphin-v2...")
_parser = DolphinParser.from_env()
logger.info("cf-docuvision: ready")
yield
_parser = None
app = FastAPI(
title="cf-docuvision",
description="Dolphin-v2 document parsing service for CircuitForge products.",
version="0.1.0",
lifespan=lifespan,
)
# ── Request / Response schemas ────────────────────────────────────────────────
class ExtractRequest(BaseModel):
image_b64: str
hint: str = "auto"
class ExtractResponse(BaseModel):
elements: list[dict[str, Any]]
tables: list[dict[str, Any]]
raw_text: str
metadata: dict[str, Any]
# ── Endpoints ─────────────────────────────────────────────────────────────────
@app.get("/health")
def health():
"""Health check. Returns 200 when the model is loaded and ready."""
if _parser is None:
raise HTTPException(status_code=503, detail="Model not loaded")
return {"status": "ok", "model": _parser._model_id}
@app.post("/extract", response_model=ExtractResponse)
def extract(req: ExtractRequest):
"""
Parse a document image into structured elements.
Request body:
image_b64 Base64-encoded image bytes (JPEG, PNG, TIFF, PDF page, etc.)
hint Extraction focus: "auto" | "table" | "text" | "form"
Response matches the DocuvisionClient._parse_response() contract in cf-core.
"""
if _parser is None:
raise HTTPException(status_code=503, detail="Model not loaded")
if req.hint not in ("auto", "table", "text", "form"):
raise HTTPException(status_code=422, detail=f"Invalid hint {req.hint!r}")
t0 = time.monotonic()
try:
result = _parser.parse_b64(req.image_b64, hint=req.hint)
except Exception as exc:
logger.exception("cf-docuvision: parse failed")
raise HTTPException(status_code=500, detail=str(exc)) from exc
elements, tables = dolphin_to_cf_elements(result)
elapsed_ms = round((time.monotonic() - t0) * 1000)
logger.info(
"cf-docuvision: extracted %d elements, %d tables in %dms",
len(elements), len(tables), elapsed_ms,
)
return ExtractResponse(
elements=elements,
tables=tables,
raw_text=result.raw_text,
metadata={
"source": "cf-docuvision",
"model": result.model,
"hint": req.hint,
"elapsed_ms": elapsed_ms,
},
)