fix: CPU as default inference profile, remote last in list

- Reorder PROFILES in step_hardware.py, _WIZARD_PROFILES in dev-api.py, and <option> elements in WizardHardwareStep.vue: cpu → single-gpu → dual-gpu → cf-orch → remote - _suggest_profile() now defaults to "cpu" instead of "remote" when no local GPUs detected - Update no-GPU hint text to remove "Remote" from suggested options - Add nvidia GPU device reservation to compose.wizard-test.yml so the wizard test instance can run nvidia-smi and detect host GPUs - Switch wizard-test compose to use ghcr.io/circuitforgellc/peregrine:latest (same image as main compose, avoids stale peregrine-api tag drift)
2026-06-15 09:11:14 -07:00 · 2026-06-15 09:11:14 -07:00 · f799aff4e0
commit f799aff4e0
parent 7e361aa6d1
4 changed files with 68 additions and 6 deletions
--- a/app/wizard/step_hardware.py
+++ b/app/wizard/step_hardware.py
@ -1,6 +1,6 @@
 """Step 1 — Hardware detection and inference profile selection."""

-PROFILES = ["remote", "cpu", "single-gpu", "dual-gpu"]
+PROFILES = ["cpu", "single-gpu", "dual-gpu", "cf-orch", "remote"]


 def validate(data: dict) -> list[str]:
--- a/compose.wizard-test.yml
+++ b/compose.wizard-test.yml
@ -0,0 +1,62 @@
+# compose.wizard-test.yml — Fresh first-run instance for testing wizard/onboarding flows
+#
+# Spins up on port 8507 with ephemeral storage so every `docker compose restart`
+# gives a completely clean slate. Perfect for exercising the onboarding wizard,
+# AI interview, and first-run UX without touching the real data.
+#
+# Usage:
+#   docker compose -f compose.wizard-test.yml --project-name peregrine-wizard up -d
+#   docker compose -f compose.wizard-test.yml --project-name peregrine-wizard restart api
+#   docker compose -f compose.wizard-test.yml --project-name peregrine-wizard down
+
+services:
+
+  api:
+    image: ghcr.io/circuitforgellc/peregrine:latest   # same image as main compose
+    command: >
+      bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
+    volumes:
+      - ./config/wizard-test:/app/config   # LLM config only — no user.yaml triggers wizard
+    tmpfs:
+      - /app/data                           # ephemeral DB; wipes on restart → clean first-run every time
+    environment:
+      - STAGING_DB=/app/data/staging.db
+      - DOCS_DIR=/tmp/wizard-test-docs
+      - PYTHONUNBUFFERED=1
+      - CF_ORCH_URL=http://host.docker.internal:7700
+      - CF_APP_NAME=peregrine
+      - GPU_SERVER_URL=http://host.docker.internal:7700
+      - HEIMDALL_URL=http://host.docker.internal:8000    # license check — skip for local testing
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    depends_on:
+      searxng:
+        condition: service_healthy
+    restart: unless-stopped
+    # No host port — nginx in web proxies /api/ → api:8601
+
+  web:
+    image: ghcr.io/circuitforgellc/peregrine-web:latest   # same image as main compose
+    ports:
+      - "8507:80"
+    depends_on:
+      - api
+    restart: unless-stopped
+
+  searxng:
+    image: searxng/searxng:latest
+    volumes:
+      - ./docker/searxng:/etc/searxng:ro
+    healthcheck:
+      test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+    restart: unless-stopped
--- a/dev-api.py
+++ b/dev-api.py
@ -4280,7 +4280,7 @@ def export_classifier():
 # State is persisted to user.yaml on every step so the wizard can resume
 # after a browser refresh or crash (mirrors the Streamlit wizard behaviour).

-_WIZARD_PROFILES = ("remote", "cpu", "single-gpu", "dual-gpu", "cf-orch")
+_WIZARD_PROFILES = ("cpu", "single-gpu", "dual-gpu", "cf-orch", "remote")
 _WIZARD_TIERS = ("free", "paid", "premium")


@ -4326,7 +4326,7 @@ def _suggest_profile(gpus: list[str]) -> str:
        return "dual-gpu"
    if len(gpus) == 1:
        return "single-gpu"
-    return "remote"
+    return "cpu"


@app.get("/api/wizard/status")
--- a/web/src/views/wizard/WizardHardwareStep.vue
+++ b/web/src/views/wizard/WizardHardwareStep.vue
@ -13,13 +13,12 @@
        {{ wizard.hardware.gpus.join(', ') }}
      </div>
      <div v-else class="step__info">
-        No local NVIDIA GPUs detected. "Remote", "CPU", or "cf-orch" mode recommended.
+        No local NVIDIA GPUs detected. CPU or cf-orch mode recommended.
      </div>

      <div class="step__field">
        <label class="step__label" for="hw-profile">Inference profile</label>
        <select id="hw-profile" v-model="selectedProfile" class="step__select">
-          <option value="remote">Remote — use cloud API keys</option>
          <option value="cpu">CPU — local Ollama, no GPU</option>
          <option value="single-gpu">Single GPU — local Ollama + one GPU</option>
          <option value="dual-gpu">Dual GPU — local Ollama + two GPUs</option>
@ -27,6 +26,7 @@
            cf-orch — CircuitForge GPU cluster
            {{ orchAvailable ? `(${orchGpus.length} GPU(s) available)` : '(configure endpoint below)' }}
          </option>
+          <option value="remote">Remote — use cloud API keys</option>
        </select>
      </div>

@ -74,7 +74,7 @@
        v-else-if="selectedProfile !== 'remote' && !wizard.hardware.gpus.length"
        class="step__warning"
      >
-        ⚠️ No local GPUs detected — a GPU profile may not work. Choose CPU, Remote,
+        ⚠️ No local GPUs detected — a GPU profile may not work. Choose CPU
        or cf-orch if you have access to the cluster.
      </div>
    </template>