FastAPI microservice wrapping ByteDance/Dolphin-v2 (Qwen2.5-VL-3B base) for structured document extraction. Exposes POST /extract and GET /health. Maps Dolphin's 21 element types to cf-core's 7-type canonical schema. Services: cf-text /extract, /health Env vars: CF_DOCUVISION_MODEL, CF_DOCUVISION_DEVICE, CF_DOCUVISION_PORT GPU: 8GB+ VRAM required for Dolphin-v2; CPU fallback available but very slow.
26 lines
787 B
YAML
26 lines
787 B
YAML
services:
|
|
cf-docuvision:
|
|
build: .
|
|
network_mode: host
|
|
env_file: .env
|
|
environment:
|
|
CF_DOCUVISION_PORT: "8003"
|
|
volumes:
|
|
# Cache HuggingFace model weights across rebuilds
|
|
- ${HOME}/.cache/huggingface:/root/.cache/huggingface
|
|
# Optional: mount a local model path to skip HF download
|
|
# - /Library/Assets/LLM/dolphin-v2:/models/dolphin-v2:ro
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8003/health')"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 120s
|