diff --git a/.env.example b/.env.example index 63140b1..bc393f0 100644 --- a/.env.example +++ b/.env.example @@ -98,14 +98,16 @@ CF_APP_NAME=snipe # OLLAMA_HOST=http://localhost:11434 # OLLAMA_MODEL=llava:7b -# CF Orchestrator — routes vision/LLM tasks to a cf-orch coordinator for VRAM management. +# GPU Server — routes vision/LLM tasks to a cf-orch coordinator for VRAM management. # Self-hosted: point at a local cf-orch coordinator if you have one running. # Cloud (internal): managed coordinator at orch.circuitforge.tech. # Leave unset to run vision tasks inline (no VRAM coordination). -# CF_ORCH_URL=http://10.1.10.71:7700 +# GPU_SERVER_URL=http://10.1.10.71:7700 +# +# CF_ORCH_URL is accepted as a backward-compat alias for GPU_SERVER_URL. # # cf-orch agent (compose --profile orch) — coordinator URL for the sidecar agent. -# Defaults to CF_ORCH_URL if unset. +# Defaults to GPU_SERVER_URL if unset. # CF_ORCH_COORDINATOR_URL=http://10.1.10.71:7700 # ── Community DB (optional) ────────────────────────────────────────────────── diff --git a/api/main.py b/api/main.py index 1710b8a..50aaa03 100644 --- a/api/main.py +++ b/api/main.py @@ -209,7 +209,7 @@ async def _lifespan(app: FastAPI): _category_cache.refresh(token_manager=None) # bootstrap fallback try: - cforch_url = os.getenv("CF_ORCH_URL") or None + cforch_url = os.getenv("GPU_SERVER_URL") or os.getenv("CF_ORCH_URL") or None if cforch_url: _query_translator = QueryTranslator( category_cache=_category_cache, diff --git a/app/llm/router.py b/app/llm/router.py index 802c3a6..353ec78 100644 --- a/app/llm/router.py +++ b/app/llm/router.py @@ -6,7 +6,7 @@ Snipe LLMRouter shim — tri-level config path priority. Config lookup order: 1. /config/llm.yaml — per-install local override 2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default) - 3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, CF_ORCH_URL) + 3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, GPU_SERVER_URL) """ from pathlib import Path diff --git a/app/tasks/runner.py b/app/tasks/runner.py index 5b6b1c2..ce59649 100644 --- a/app/tasks/runner.py +++ b/app/tasks/runner.py @@ -8,9 +8,9 @@ Current task types: result to trust_scores.photo_analysis_json (Paid tier). Image assessment routing: - Cloud (CF_ORCH_URL set): allocates via cf-orch task endpoint + Cloud (GPU_SERVER_URL set): allocates via cf-orch task endpoint product=snipe, task=image_assessment. - Local (no CF_ORCH_URL) or TaskNotFound fallback: uses LLMRouter + Local (no GPU_SERVER_URL) or TaskNotFound fallback: uses LLMRouter with a vision-capable local backend (moondream2, llava, etc.). """ from __future__ import annotations @@ -135,7 +135,7 @@ def _run_trust_photo_analysis( if listing_title: user_prompt = f"Assess this eBay listing image: {listing_title}" - cforch_url = os.getenv("CF_ORCH_URL") + cforch_url = os.getenv("GPU_SERVER_URL") or os.getenv("CF_ORCH_URL") if cforch_url: raw = _assess_via_orch(cforch_url, image_data_url, user_prompt) else: diff --git a/compose.cloud.yml b/compose.cloud.yml index af83ccf..6c28923 100644 --- a/compose.cloud.yml +++ b/compose.cloud.yml @@ -20,9 +20,9 @@ services: CLOUD_MODE: "true" CLOUD_DATA_ROOT: /devl/snipe-cloud-data # DIRECTUS_JWT_SECRET, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN — set in .env (never commit) - # CF_ORCH_URL routes LLM query builder through cf-orch for VRAM-aware scheduling. + # GPU_SERVER_URL routes LLM query builder through cf-orch for VRAM-aware scheduling. # Override in .env to use a different coordinator URL. - CF_ORCH_URL: "http://host.docker.internal:7700" + GPU_SERVER_URL: "http://host.docker.internal:7700" CF_APP_NAME: snipe extra_hosts: - "host.docker.internal:host-gateway" diff --git a/compose.override.yml b/compose.override.yml index 097e47f..ea04bba 100644 --- a/compose.override.yml +++ b/compose.override.yml @@ -18,8 +18,8 @@ services: environment: - RELOAD=true # Point the LLM/vision task scheduler at the local cf-orch coordinator. - # Only has effect when CF_ORCH_URL is set (uncomment in .env, or set inline). - # - CF_ORCH_URL=http://10.1.10.71:7700 + # Only has effect when GPU_SERVER_URL is set (uncomment in .env, or set inline). + # - GPU_SERVER_URL=http://10.1.10.71:7700 # cf-orch agent — routes trust_photo_analysis vision tasks to the GPU coordinator. # Only starts when you pass --profile orch: diff --git a/config/llm.cloud.yaml b/config/llm.cloud.yaml index 0d39774..d96e97f 100644 --- a/config/llm.cloud.yaml +++ b/config/llm.cloud.yaml @@ -6,7 +6,7 @@ # (claude_code, copilot) are intentionally excluded here. # # CF Orchestrator routes both ollama and vllm allocations for VRAM-aware -# scheduling. CF_ORCH_URL must be set in .env for allocations to resolve; +# scheduling. GPU_SERVER_URL must be set in .env for allocations to resolve; # if cf-orch is unreachable the backend falls back to its static base_url. # # Model choice for query builder: llama3.1:8b diff --git a/tests/test_tasks/test_runner.py b/tests/test_tasks/test_runner.py index c593870..2d50ecc 100644 --- a/tests/test_tasks/test_runner.py +++ b/tests/test_tasks/test_runner.py @@ -173,7 +173,7 @@ def _make_orch_client_mock(vision_json: str) -> MagicMock: def test_run_task_photo_analysis_orch_success(tmp_db: Path): - """Cloud path: CFOrchClient.task_allocate is used when CF_ORCH_URL is set.""" + """Cloud path: CFOrchClient.task_allocate is used when GPU_SERVER_URL is set.""" task_id, _ = insert_task(tmp_db, "trust_photo_analysis", job_id=1, params=_PARAMS) chat_resp = MagicMock() @@ -181,7 +181,7 @@ def test_run_task_photo_analysis_orch_success(tmp_db: Path): chat_resp.raise_for_status = MagicMock() with patch("app.tasks.runner.requests") as mock_req, \ - patch.dict("os.environ", {"CF_ORCH_URL": "http://cf-orch.local:8700"}), \ + patch.dict("os.environ", {"GPU_SERVER_URL": "http://cf-orch.local:8700"}), \ patch("app.tasks.runner.httpx") as mock_httpx, \ patch("circuitforge_orch.client.CFOrchClient") as MockClient: @@ -216,7 +216,7 @@ def test_run_task_photo_analysis_orch_uses_image_assessment_task(tmp_db: Path): chat_resp.raise_for_status = MagicMock() with patch("app.tasks.runner.requests") as mock_req, \ - patch.dict("os.environ", {"CF_ORCH_URL": "http://cf-orch.local:8700"}), \ + patch.dict("os.environ", {"GPU_SERVER_URL": "http://cf-orch.local:8700"}), \ patch("app.tasks.runner.httpx") as mock_httpx, \ patch("circuitforge_orch.client.CFOrchClient") as MockClient: @@ -248,7 +248,7 @@ def test_run_task_photo_analysis_orch_sends_image_url_content(tmp_db: Path): return resp with patch("app.tasks.runner.requests") as mock_req, \ - patch.dict("os.environ", {"CF_ORCH_URL": "http://cf-orch.local:8700"}), \ + patch.dict("os.environ", {"GPU_SERVER_URL": "http://cf-orch.local:8700"}), \ patch("app.tasks.runner.httpx") as mock_httpx, \ patch("circuitforge_orch.client.CFOrchClient") as MockClient: @@ -282,7 +282,7 @@ def test_run_task_photo_analysis_orch_task_not_found_falls_back(tmp_db: Path): client_instance.task_allocate.return_value = cm with patch("app.tasks.runner.requests") as mock_req, \ - patch.dict("os.environ", {"CF_ORCH_URL": "http://cf-orch.local:8700"}), \ + patch.dict("os.environ", {"GPU_SERVER_URL": "http://cf-orch.local:8700"}), \ patch("circuitforge_orch.client.CFOrchClient", return_value=client_instance), \ patch("app.tasks.runner._assess_via_local_llm", return_value=_VISION_JSON) as mock_local: