sparrow/app/api/endpoints/gpu.py

# app/api/endpoints/gpu.py — cf-orch GPU status and session allocation
#
# GET /api/gpu/status — available capacity from cf-orch
# POST /api/gpu/connect — session-held allocation (Premium tier, stub)
# DELETE /api/gpu/disconnect — release session allocation (Premium tier, stub)
from __future__ import annotations

import logging
import os

import httpx
from fastapi import APIRouter, HTTPException

logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/gpu", tags=["gpu"])

_ORCH_URL = os.environ.get("CF_ORCH_URL", "").rstrip("/")
_SERVICE = "cf-musicgen"


@router.get("/status")
async def gpu_status() -> dict:
    """
    Return current cf-orch capacity for cf-musicgen.

    Returns {"available": False, "reason": "..."} if cf-orch is unreachable
    or unconfigured (e.g. mock mode).
    """
    if not _ORCH_URL:
        return {
            "available": False,
            "reason": "CF_ORCH_URL not configured — running in mock mode.",
            "mock": True,
        }
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            resp = await client.get(
                f"{_ORCH_URL}/api/services/{_SERVICE}/status"
            )
            resp.raise_for_status()
            return resp.json()
    except httpx.HTTPStatusError as exc:
        raise HTTPException(
            status_code=502,
            detail=f"cf-orch returned {exc.response.status_code}.",
        ) from exc
    except Exception as exc:
        raise HTTPException(
            status_code=502,
            detail=f"cf-orch unreachable: {exc}",
        ) from exc


@router.post("/connect", status_code=501)
async def gpu_connect() -> dict:
    """
    Session-held GPU allocation (Premium tier).

    Not yet implemented — tracked in cf-orch #43.
    """
    raise HTTPException(
        status_code=501,
        detail="Session-held GPU allocation is not yet implemented (cf-orch #43).",
    )


@router.delete("/disconnect", status_code=501)
async def gpu_disconnect() -> dict:
    """Release a session-held GPU allocation (Premium tier, stub)."""
    raise HTTPException(
        status_code=501,
        detail="Session-held GPU allocation is not yet implemented (cf-orch #43).",
    )