fix: CPU as default inference profile, remote last in list

- Reorder PROFILES in step_hardware.py, _WIZARD_PROFILES in dev-api.py, and <option> elements in WizardHardwareStep.vue: cpu → single-gpu → dual-gpu → cf-orch → remote - _suggest_profile() now defaults to "cpu" instead of "remote" when no local GPUs detected - Update no-GPU hint text to remove "Remote" from suggested options - Add nvidia GPU device reservation to compose.wizard-test.yml so the wizard test instance can run nvidia-smi and detect host GPUs - Switch wizard-test compose to use ghcr.io/circuitforgellc/peregrine:latest (same image as main compose, avoids stale peregrine-api tag drift)
2026-06-15 09:11:14 -07:00 · 2026-06-15 09:11:14 -07:00 · f799aff4e0
commit f799aff4e0
parent 7e361aa6d1
4 changed files with 68 additions and 6 deletions
--- a/app/wizard/step_hardware.py
+++ b/app/wizard/step_hardware.py
@ -1,6 +1,6 @@
 """Step 1 — Hardware detection and inference profile selection."""
-PROFILES = ["remote", "cpu", "single-gpu", "dual-gpu"]
+PROFILES = ["cpu", "single-gpu", "dual-gpu", "cf-orch", "remote"]
 def validate(data: dict) -> list[str]:
--- a/compose.wizard-test.yml
+++ b/compose.wizard-test.yml
@ -0,0 +1,62 @@
 # compose.wizard-test.yml — Fresh first-run instance for testing wizard/onboarding flows
 #
 # Spins up on port 8507 with ephemeral storage so every `docker compose restart`
 # gives a completely clean slate. Perfect for exercising the onboarding wizard,
 # AI interview, and first-run UX without touching the real data.
 #
 # Usage:
 #   docker compose -f compose.wizard-test.yml --project-name peregrine-wizard up -d
 #   docker compose -f compose.wizard-test.yml --project-name peregrine-wizard restart api
 #   docker compose -f compose.wizard-test.yml --project-name peregrine-wizard down
 services:
  api:
    image: ghcr.io/circuitforgellc/peregrine:latest   # same image as main compose
    command: >
      bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
    volumes:
      - ./config/wizard-test:/app/config   # LLM config only — no user.yaml triggers wizard
    tmpfs:
      - /app/data                           # ephemeral DB; wipes on restart → clean first-run every time
    environment:
      - STAGING_DB=/app/data/staging.db
      - DOCS_DIR=/tmp/wizard-test-docs
      - PYTHONUNBUFFERED=1
      - CF_ORCH_URL=http://host.docker.internal:7700
      - CF_APP_NAME=peregrine
      - GPU_SERVER_URL=http://host.docker.internal:7700
      - HEIMDALL_URL=http://host.docker.internal:8000    # license check — skip for local testing
    extra_hosts:
      - "host.docker.internal:host-gateway"
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    depends_on:
      searxng:
        condition: service_healthy
    restart: unless-stopped
    # No host port — nginx in web proxies /api/ → api:8601
  web:
    image: ghcr.io/circuitforgellc/peregrine-web:latest   # same image as main compose
    ports:
      - "8507:80"
    depends_on:
      - api
    restart: unless-stopped
  searxng:
    image: searxng/searxng:latest
    volumes:
      - ./docker/searxng:/etc/searxng:ro
    healthcheck:
      test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/"]
      interval: 10s
      timeout: 5s
      retries: 3
    restart: unless-stopped
--- a/dev-api.py
+++ b/dev-api.py
@ -4280,7 +4280,7 @@ def export_classifier():
 # State is persisted to user.yaml on every step so the wizard can resume
 # after a browser refresh or crash (mirrors the Streamlit wizard behaviour).
-_WIZARD_PROFILES = ("remote", "cpu", "single-gpu", "dual-gpu", "cf-orch")
+_WIZARD_PROFILES = ("cpu", "single-gpu", "dual-gpu", "cf-orch", "remote")
 _WIZARD_TIERS = ("free", "paid", "premium")
@ -4326,7 +4326,7 @@ def _suggest_profile(gpus: list[str]) -> str:
        return "dual-gpu"
    if len(gpus) == 1:
        return "single-gpu"
-    return "remote"
+    return "cpu"
@app.get("/api/wizard/status")
--- a/web/src/views/wizard/WizardHardwareStep.vue
+++ b/web/src/views/wizard/WizardHardwareStep.vue
@ -13,13 +13,12 @@
        {{ wizard.hardware.gpus.join(', ') }}
      </div>
      <div v-else class="step__info">
-        No local NVIDIA GPUs detected. "Remote", "CPU", or "cf-orch" mode recommended.
+        No local NVIDIA GPUs detected. CPU or cf-orch mode recommended.
      </div>
      <div class="step__field">
        <label class="step__label" for="hw-profile">Inference profile</label>
        <select id="hw-profile" v-model="selectedProfile" class="step__select">
          <option value="remote">Remote — use cloud API keys</option>
          <option value="cpu">CPU — local Ollama, no GPU</option>
          <option value="single-gpu">Single GPU — local Ollama + one GPU</option>
          <option value="dual-gpu">Dual GPU — local Ollama + two GPUs</option>
@ -27,6 +26,7 @@
            cf-orch — CircuitForge GPU cluster
            {{ orchAvailable ? `(${orchGpus.length} GPU(s) available)` : '(configure endpoint below)' }}
          </option>
          <option value="remote">Remote — use cloud API keys</option>
        </select>
      </div>
@ -74,7 +74,7 @@
        v-else-if="selectedProfile !== 'remote' && !wizard.hardware.gpus.length"
        class="step__warning"
      >
-        ⚠️ No local GPUs detected — a GPU profile may not work. Choose CPU, Remote,
+        ⚠️ No local GPUs detected — a GPU profile may not work. Choose CPU
        or cf-orch if you have access to the cluster.
      </div>
    </template>