cf-docuvision/app/main.py

# app/main.py — cf-docuvision FastAPI service
#
# Exposes POST /extract and GET /health.
# Response schema matches DocuvisionClient._parse_response() in cf-core.
#
# Start:
#   uvicorn app.main:app --host 0.0.0.0 --port 8003
#   CF_DOCUVISION_DEVICE=cuda uvicorn app.main:app ...
from __future__ import annotations

import logging
import os
import time
from contextlib import asynccontextmanager
from typing import Any

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

from app.dolphin import DolphinParser, dolphin_to_cf_elements

logger = logging.getLogger(__name__)
logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))

# ── Model lifecycle ───────────────────────────────────────────────────────────

_parser: DolphinParser | None = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    global _parser
    logger.info("cf-docuvision: loading Dolphin-v2...")
    _parser = DolphinParser.from_env()
    logger.info("cf-docuvision: ready")
    yield
    _parser = None


app = FastAPI(
    title="cf-docuvision",
    description="Dolphin-v2 document parsing service for CircuitForge products.",
    version="0.1.0",
    lifespan=lifespan,
)


# ── Request / Response schemas ────────────────────────────────────────────────

class ExtractRequest(BaseModel):
    image_b64: str
    hint: str = "auto"


class ExtractResponse(BaseModel):
    elements: list[dict[str, Any]]
    tables: list[dict[str, Any]]
    raw_text: str
    metadata: dict[str, Any]


# ── Endpoints ─────────────────────────────────────────────────────────────────

@app.get("/health")
def health():
    """Health check. Returns 200 when the model is loaded and ready."""
    if _parser is None:
        raise HTTPException(status_code=503, detail="Model not loaded")
    return {"status": "ok", "model": _parser._model_id}


@app.post("/extract", response_model=ExtractResponse)
def extract(req: ExtractRequest):
    """
    Parse a document image into structured elements.

    Request body:
        image_b64   Base64-encoded image bytes (JPEG, PNG, TIFF, PDF page, etc.)
        hint        Extraction focus: "auto" | "table" | "text" | "form"

    Response matches the DocuvisionClient._parse_response() contract in cf-core.
    """
    if _parser is None:
        raise HTTPException(status_code=503, detail="Model not loaded")

    if req.hint not in ("auto", "table", "text", "form"):
        raise HTTPException(status_code=422, detail=f"Invalid hint {req.hint!r}")

    t0 = time.monotonic()
    try:
        result = _parser.parse_b64(req.image_b64, hint=req.hint)
    except Exception as exc:
        logger.exception("cf-docuvision: parse failed")
        raise HTTPException(status_code=500, detail=str(exc)) from exc

    elements, tables = dolphin_to_cf_elements(result)
    elapsed_ms = round((time.monotonic() - t0) * 1000)
    logger.info(
        "cf-docuvision: extracted %d elements, %d tables in %dms",
        len(elements), len(tables), elapsed_ms,
    )

    return ExtractResponse(
        elements=elements,
        tables=tables,
        raw_text=result.raw_text,
        metadata={
            "source": "cf-docuvision",
            "model": result.model,
            "hint": req.hint,
            "elapsed_ms": elapsed_ms,
        },
    )