# app/api/endpoints/gpu.py — cf-orch GPU status and session allocation # # GET /api/gpu/status — available capacity from cf-orch # POST /api/gpu/connect — session-held allocation (Premium tier, stub) # DELETE /api/gpu/disconnect — release session allocation (Premium tier, stub) from __future__ import annotations import logging import os import httpx from fastapi import APIRouter, HTTPException logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/gpu", tags=["gpu"]) _ORCH_URL = os.environ.get("CF_ORCH_URL", "").rstrip("/") _SERVICE = "cf-musicgen" @router.get("/status") async def gpu_status() -> dict: """ Return current cf-orch capacity for cf-musicgen. Returns {"available": False, "reason": "..."} if cf-orch is unreachable or unconfigured (e.g. mock mode). """ if not _ORCH_URL: return { "available": False, "reason": "CF_ORCH_URL not configured — running in mock mode.", "mock": True, } try: async with httpx.AsyncClient(timeout=5.0) as client: resp = await client.get( f"{_ORCH_URL}/api/services/{_SERVICE}/status" ) resp.raise_for_status() return resp.json() except httpx.HTTPStatusError as exc: raise HTTPException( status_code=502, detail=f"cf-orch returned {exc.response.status_code}.", ) from exc except Exception as exc: raise HTTPException( status_code=502, detail=f"cf-orch unreachable: {exc}", ) from exc @router.post("/connect", status_code=501) async def gpu_connect() -> dict: """ Session-held GPU allocation (Premium tier). Not yet implemented — tracked in cf-orch #43. """ raise HTTPException( status_code=501, detail="Session-held GPU allocation is not yet implemented (cf-orch #43).", ) @router.delete("/disconnect", status_code=501) async def gpu_disconnect() -> dict: """Release a session-held GPU allocation (Premium tier, stub).""" raise HTTPException( status_code=501, detail="Session-held GPU allocation is not yet implemented (cf-orch #43).", )