feat(resources): add cf-orch CLI with start, agent, status, install-service commands
This commit is contained in:
parent
dba49a47fe
commit
70017abd35
2 changed files with 163 additions and 0 deletions
130
circuitforge_core/resources/cli.py
Normal file
130
circuitforge_core/resources/cli.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import typer
|
||||
import uvicorn
|
||||
|
||||
app = typer.Typer(name="cf-orch", help="CircuitForge GPU resource orchestrator")
|
||||
|
||||
_SYSTEMD_UNIT_PATH = Path("/etc/systemd/system/cf-orch.service")
|
||||
|
||||
_SYSTEMD_UNIT_TEMPLATE = """\
|
||||
[Unit]
|
||||
Description=CircuitForge GPU Resource Orchestrator
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart={python} -m circuitforge_core.resources.cli start
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
"""
|
||||
|
||||
|
||||
@app.command()
|
||||
def start(
|
||||
profile: Annotated[Optional[Path], typer.Option(help="Profile YAML path")] = None,
|
||||
host: str = "0.0.0.0",
|
||||
port: int = 7700,
|
||||
agent_port: int = 7701,
|
||||
) -> None:
|
||||
"""Start the cf-orch coordinator (auto-detects GPU profile if not specified)."""
|
||||
from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
|
||||
from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
|
||||
from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
|
||||
from circuitforge_core.resources.coordinator.app import create_coordinator_app
|
||||
from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
|
||||
|
||||
lease_manager = LeaseManager()
|
||||
profile_registry = ProfileRegistry()
|
||||
supervisor = AgentSupervisor(lease_manager=lease_manager)
|
||||
|
||||
monitor = GpuMonitor()
|
||||
gpus = monitor.poll()
|
||||
if not gpus:
|
||||
typer.echo("Warning: no GPUs detected via nvidia-smi — coordinator running with 0 VRAM")
|
||||
else:
|
||||
for gpu in gpus:
|
||||
lease_manager.register_gpu("local", gpu.gpu_id, gpu.vram_total_mb)
|
||||
typer.echo(f"Detected {len(gpus)} GPU(s)")
|
||||
|
||||
if profile:
|
||||
active_profile = profile_registry.load(profile)
|
||||
typer.echo(f"Using profile: {active_profile.name} (from {profile})")
|
||||
else:
|
||||
active_profile = profile_registry.auto_detect(gpus) if gpus else profile_registry.list_public()[-1]
|
||||
typer.echo(f"Auto-selected profile: {active_profile.name}")
|
||||
|
||||
coordinator_app = create_coordinator_app(
|
||||
lease_manager=lease_manager,
|
||||
profile_registry=profile_registry,
|
||||
agent_supervisor=supervisor,
|
||||
)
|
||||
|
||||
typer.echo(f"Starting cf-orch coordinator on {host}:{port}")
|
||||
uvicorn.run(coordinator_app, host=host, port=port)
|
||||
|
||||
|
||||
@app.command()
|
||||
def agent(
|
||||
coordinator: str = "http://localhost:7700",
|
||||
node_id: str = "local",
|
||||
host: str = "0.0.0.0",
|
||||
port: int = 7701,
|
||||
) -> None:
|
||||
"""Start a cf-orch node agent (for remote nodes like Navi, Huginn)."""
|
||||
from circuitforge_core.resources.agent.app import create_agent_app
|
||||
|
||||
agent_app = create_agent_app(node_id=node_id)
|
||||
typer.echo(f"Starting cf-orch agent [{node_id}] on {host}:{port}")
|
||||
uvicorn.run(agent_app, host=host, port=port)
|
||||
|
||||
|
||||
@app.command()
|
||||
def status(coordinator: str = "http://localhost:7700") -> None:
|
||||
"""Show GPU and lease status from the coordinator."""
|
||||
import httpx
|
||||
try:
|
||||
resp = httpx.get(f"{coordinator}/api/nodes", timeout=5.0)
|
||||
resp.raise_for_status()
|
||||
nodes = resp.json().get("nodes", [])
|
||||
for node in nodes:
|
||||
typer.echo(f"\nNode: {node['node_id']}")
|
||||
for gpu in node.get("gpus", []):
|
||||
typer.echo(
|
||||
f" GPU {gpu['gpu_id']}: {gpu['name']} — "
|
||||
f"{gpu['vram_used_mb']}/{gpu['vram_total_mb']} MB used"
|
||||
)
|
||||
except Exception as exc:
|
||||
typer.echo(f"Coordinator unreachable at {coordinator}: {exc}", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
@app.command("install-service")
|
||||
def install_service(
|
||||
dry_run: bool = typer.Option(False, "--dry-run", help="Print unit file without writing"),
|
||||
) -> None:
|
||||
"""Write a systemd unit file for cf-orch (requires root)."""
|
||||
python = sys.executable
|
||||
unit_content = _SYSTEMD_UNIT_TEMPLATE.format(python=python)
|
||||
if dry_run:
|
||||
typer.echo(f"Would write to {_SYSTEMD_UNIT_PATH}:\n")
|
||||
typer.echo(unit_content)
|
||||
return
|
||||
try:
|
||||
_SYSTEMD_UNIT_PATH.write_text(unit_content)
|
||||
typer.echo(f"Written: {_SYSTEMD_UNIT_PATH}")
|
||||
typer.echo("Run: sudo systemctl daemon-reload && sudo systemctl enable --now cf-orch")
|
||||
except PermissionError:
|
||||
typer.echo(f"Permission denied writing to {_SYSTEMD_UNIT_PATH}. Run as root.", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
33
tests/test_resources/test_cli.py
Normal file
33
tests/test_resources/test_cli.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from circuitforge_core.resources.cli import app
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
|
||||
def test_cli_help():
|
||||
result = runner.invoke(app, ["--help"])
|
||||
assert result.exit_code == 0
|
||||
assert "cf-orch" in result.output.lower() or "Usage" in result.output
|
||||
|
||||
|
||||
def test_status_command_shows_no_coordinator_message():
|
||||
with patch("httpx.get", side_effect=ConnectionRefusedError("refused")):
|
||||
result = runner.invoke(app, ["status"])
|
||||
assert result.exit_code != 0 or "unreachable" in result.output.lower() \
|
||||
or "coordinator" in result.output.lower()
|
||||
|
||||
|
||||
def test_install_service_creates_systemd_unit(tmp_path: Path):
|
||||
unit_path = tmp_path / "cf-orch.service"
|
||||
with patch(
|
||||
"circuitforge_core.resources.cli._SYSTEMD_UNIT_PATH", unit_path
|
||||
):
|
||||
result = runner.invoke(app, ["install-service", "--dry-run"])
|
||||
assert result.exit_code == 0
|
||||
assert "cf-orch.service" in result.output or "systemd" in result.output.lower()
|
||||
Loading…
Reference in a new issue