diff --git a/.env.example b/.env.example index b9a7ade..1fe6671 100644 --- a/.env.example +++ b/.env.example @@ -18,6 +18,8 @@ DATA_DIR=./data # CF-core resource coordinator (VRAM lease management) # Set to the coordinator URL when running alongside cf-core orchestration # COORDINATOR_URL=http://localhost:7700 +# IP this machine advertises to the coordinator (must be reachable from coordinator host) +# CF_ORCH_ADVERTISE_HOST=10.1.10.71 # Processing USE_GPU=true diff --git a/app/tasks/scheduler.py b/app/tasks/scheduler.py index b916852..64bd268 100644 --- a/app/tasks/scheduler.py +++ b/app/tasks/scheduler.py @@ -10,6 +10,7 @@ from circuitforge_core.tasks.scheduler import ( reset_scheduler, # re-export for tests ) +from app.core.config import settings from app.tasks.runner import LLM_TASK_TYPES, VRAM_BUDGETS, run_task @@ -20,4 +21,6 @@ def get_scheduler(db_path: Path) -> TaskScheduler: run_task_fn=run_task, task_types=LLM_TASK_TYPES, vram_budgets=VRAM_BUDGETS, + coordinator_url=settings.COORDINATOR_URL, + service_name="kiwi", ) diff --git a/compose.override.yml b/compose.override.yml new file mode 100644 index 0000000..c82728d --- /dev/null +++ b/compose.override.yml @@ -0,0 +1,24 @@ +# compose.override.yml — local dev additions (auto-merged by docker compose) +# Not used in cloud or demo stacks (those use compose.cloud.yml / compose.demo.yml directly). + +services: + # cf-orch agent sidecar: registers kiwi as a GPU node with the coordinator. + # The API scheduler uses COORDINATOR_URL to lease VRAM cooperatively; this + # agent makes kiwi's VRAM usage visible on the orchestrator dashboard. + cf-orch-agent: + image: kiwi-api # reuse local api image — cf-core already installed there + network_mode: host + env_file: .env + environment: + # Override coordinator URL here or via .env + COORDINATOR_URL: ${COORDINATOR_URL:-http://10.1.10.71:7700} + command: > + conda run -n kiwi cf-orch agent + --coordinator ${COORDINATOR_URL:-http://10.1.10.71:7700} + --node-id kiwi + --host 0.0.0.0 + --port 7702 + --advertise-host ${CF_ORCH_ADVERTISE_HOST:-10.1.10.71} + restart: unless-stopped + depends_on: + - api diff --git a/environment.yml b/environment.yml index de0e65b..3dc759b 100644 --- a/environment.yml +++ b/environment.yml @@ -14,6 +14,7 @@ dependencies: - numpy>=1.25 - pyzbar>=0.1.9 - httpx>=0.27 + - psutil>=5.9 - pydantic>=2.5 - PyJWT>=2.8 - datasets