feat: Imitate tab — pull CF product samples, compare LLM responses

Backend (app/imitate.py): - GET /api/imitate/products — reads imitate: config, checks online status - GET /api/imitate/products/{id}/sample — fetches real item from product API - GET /api/imitate/run (SSE) — streams ollama responses for selected models - POST /api/imitate/push-corrections — queues results in SFT corrections JSONL Frontend (ImitateView.vue): - Step 1: product picker grid (online/offline status, icon from config) - Step 2: raw sample preview + editable prompt textarea - Step 3: ollama model multi-select, temperature slider, SSE run with live log - Step 4: response cards side by side, push to Corrections button Wiring: - app/api.py: include imitate_router at /api/imitate - web/src/router: /imitate route + lazy import - AppSidebar: Imitate nav entry (mirror icon) - config/label_tool.yaml.example: imitate: section with peregrine example - 16 unit tests (100% passing) Also: BenchmarkView.vue Compare panel — side-by-side run diff for bench results
test: fix test_tasks_parses_yaml for TaskEntry schema
2026-04-09 20:04:45 -07:00 · 2026-04-09 20:02:12 -07:00
8 changed files with 24 additions and 266 deletions
--- a/.env.example
+++ b/.env.example
@ -1,19 +0,0 @@
 # Avocet — environment variable configuration
 # Copy to .env and fill in values. All keys are optional.
 # label_tool.yaml takes precedence over env vars where both exist.
 # ── Local inference (Ollama) ───────────────────────────────────────────────────
 # OLLAMA_HOST defaults to http://localhost:11434 if unset.
 OLLAMA_HOST=http://localhost:11434
 OLLAMA_MODEL=llama3.2:3b
 # ── cf-orch coordinator (paid/premium tiers) ───────────────────────────────────
 # Required for multi-GPU LLM benchmarking via the cf-orch benchmark harness.
 # Free-tier users can leave these unset and use Ollama only.
 CF_ORCH_URL=http://localhost:7700
 CF_LICENSE_KEY=CFG-AVCT-xxxx-xxxx-xxxx
 # ── Cloud LLM backends (optional — paid/premium) ──────────────────────────────
 # Set one of these to use a cloud LLM instead of a local model.
 # ANTHROPIC_API_KEY=sk-ant-...
 # OPENAI_API_KEY=sk-...
--- a/app/cforch.py
+++ b/app/cforch.py
@ -14,7 +14,6 @@ from __future__ import annotations
 import json
 import logging
 import os
 import re
 import subprocess as _subprocess
 from pathlib import Path
@ -50,32 +49,16 @@ def _config_file() -> Path:
 def _load_cforch_config() -> dict:
-    """Read label_tool.yaml cforch section, falling back to environment variables.
+    """Read label_tool.yaml and return the cforch sub-dict (or {} if absent/malformed)."""
    Priority (highest to lowest):
      1. label_tool.yaml cforch: key
      2. Environment variables (CF_ORCH_URL, CF_LICENSE_KEY, OLLAMA_HOST, OLLAMA_MODEL)
    """
    f = _config_file()
-    file_cfg: dict = {}
+    if not f.exists():
-    if f.exists():
+        return {}
    try:
        raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
            file_cfg = raw.get("cforch", {}) or {}
    except yaml.YAMLError as exc:
        logger.warning("Failed to parse cforch config %s: %s", f, exc)
-
+        return {}
-    # Env var fallbacks — only used when the yaml key is absent or empty
+    return raw.get("cforch", {}) or {}
    def _coalesce(file_val: str, env_key: str) -> str:
        return file_val if file_val else os.environ.get(env_key, "")
    return {
        **file_cfg,
        "coordinator_url": _coalesce(file_cfg.get("coordinator_url", ""), "CF_ORCH_URL"),
        "license_key":     _coalesce(file_cfg.get("license_key", ""),     "CF_LICENSE_KEY"),
        "ollama_url":      _coalesce(file_cfg.get("ollama_url", ""),       "OLLAMA_HOST"),
        "ollama_model":    _coalesce(file_cfg.get("ollama_model", ""),     "OLLAMA_MODEL"),
    }
 def _strip_ansi(text: str) -> str:
@ -204,7 +187,6 @@ def run_benchmark(
    python_bin = cfg.get("python_bin", "/devl/miniconda3/envs/cf/bin/python")
    cfg_coordinator = cfg.get("coordinator_url", "")
    cfg_ollama = cfg.get("ollama_url", "")
    cfg_license_key = cfg.get("license_key", "")
    def generate():
        global _BENCH_RUNNING, _bench_proc
@ -226,7 +208,6 @@ def run_benchmark(
        if model_tags:
            cmd.extend(["--filter-tags"] + model_tags.split(","))
        # query param overrides config, config overrides env var (already resolved by _load_cforch_config)
        effective_coordinator = coordinator_url if coordinator_url else cfg_coordinator
        effective_ollama = ollama_url if ollama_url else cfg_ollama
        if effective_coordinator:
@ -234,11 +215,6 @@ def run_benchmark(
        if effective_ollama:
            cmd.extend(["--ollama-url", effective_ollama])
        # Pass license key as env var so subprocess can authenticate with cf-orch
        proc_env = {**os.environ}
        if cfg_license_key:
            proc_env["CF_LICENSE_KEY"] = cfg_license_key
        _BENCH_RUNNING = True
        try:
            proc = _subprocess.Popen(
@ -247,7 +223,6 @@ def run_benchmark(
                stderr=_subprocess.STDOUT,
                text=True,
                bufsize=1,
                env=proc_env,
            )
            _bench_proc = proc
            try:
@ -281,25 +256,6 @@ def run_benchmark(
    )
 # ── GET /config ────────────────────────────────────────────────────────────────
@router.get("/config")
 def get_cforch_config() -> dict:
    """Return resolved cf-orch connection config (env vars merged with yaml).
    Redacts license_key — only returns whether it is set, not the value.
    Used by the Settings UI to show current connection state.
    """
    cfg = _load_cforch_config()
    return {
        "coordinator_url": cfg.get("coordinator_url", ""),
        "ollama_url":      cfg.get("ollama_url", ""),
        "ollama_model":    cfg.get("ollama_model", ""),
        "license_key_set": bool(cfg.get("license_key", "")),
        "source": "env" if not _config_file().exists() else "yaml+env",
    }
 # ── GET /results ───────────────────────────────────────────────────────────────
@router.get("/results")
--- a/app/sft.py
+++ b/app/sft.py
@ -51,26 +51,17 @@ def _config_file() -> Path:
    return _ROOT / "config" / "label_tool.yaml"
 _DEFAULT_BENCH_RESULTS_DIR = "/Library/Development/CircuitForge/circuitforge-orch/scripts/bench_results"
 def set_default_bench_results_dir(path: str) -> None:
    """Override the default bench_results_dir — used by tests to avoid real filesystem."""
    global _DEFAULT_BENCH_RESULTS_DIR
    _DEFAULT_BENCH_RESULTS_DIR = path
 def _get_bench_results_dir() -> Path:
    f = _config_file()
-    if f.exists():
+    if not f.exists():
        return Path("/nonexistent-bench-results")
    try:
        raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
            d = raw.get("sft", {}).get("bench_results_dir", "")
            if d:
                return Path(d)
    except yaml.YAMLError as exc:
        logger.warning("Failed to parse SFT config %s: %s", f, exc)
-    return Path(_DEFAULT_BENCH_RESULTS_DIR)
+        return Path("/nonexistent-bench-results")
    d = raw.get("sft", {}).get("bench_results_dir", "")
    return Path(d) if d else Path("/nonexistent-bench-results")
 def _candidates_file() -> Path:
--- a/config/label_tool.yaml.example
+++ b/config/label_tool.yaml.example
@ -27,26 +27,6 @@ max_per_account: 500
 sft:
  bench_results_dir: /path/to/circuitforge-orch/scripts/bench_results
 # cf-orch integration — LLM benchmark harness via cf-orch coordinator.
 # All keys here override the corresponding environment variables.
 # Omit any key to fall back to the env var (see .env.example).
 cforch:
  # Path to cf-orch's benchmark.py script
  bench_script: /path/to/circuitforge-orch/scripts/benchmark.py
  # Task and model definition files (yaml)
  bench_tasks:  /path/to/circuitforge-orch/scripts/bench_tasks.yaml
  bench_models: /path/to/circuitforge-orch/scripts/bench_models.yaml
  # Where benchmark results are written (also used for SFT candidate discovery)
  results_dir:  /path/to/circuitforge-orch/scripts/bench_results
  # Python interpreter with cf-orch installed
  python_bin:   /devl/miniconda3/envs/cf/bin/python
  # Connection config — override env vars CF_ORCH_URL / CF_LICENSE_KEY / OLLAMA_HOST
  # coordinator_url: http://localhost:7700
  # license_key:     CFG-AVCT-xxxx-xxxx-xxxx
  # ollama_url:      http://localhost:11434
  # ollama_model:    llama3.2:3b
 # Imitate tab — pull real samples from sibling CF product APIs and run them
 # through local LLMs to build a corrections dataset.
 # ollama_url defaults to cforch.ollama_url if omitted here.
--- a/environment.yml
+++ b/environment.yml
@ -22,8 +22,5 @@ dependencies:
    # Optional: BGE reranker adapter
    # - FlagEmbedding
    # CircuitForge shared core (LLM router, tier system, config)
    - circuitforge-core>=0.9.0
    # Dev
    - pytest>=8.0
--- a/tests/test_cforch.py
+++ b/tests/test_cforch.py
@ -283,87 +283,3 @@ def test_cancel_terminates_running_benchmark(client):
    mock_proc.terminate.assert_called_once()
    assert cforch_module._BENCH_RUNNING is False
    assert cforch_module._bench_proc is None
 # ── GET /config ────────────────────────────────────────────────────────────────
 def test_config_returns_empty_when_no_yaml_no_env(client, monkeypatch):
    """No yaml, no env vars — all fields empty, license_key_set False."""
    for key in ("CF_ORCH_URL", "CF_LICENSE_KEY", "OLLAMA_HOST", "OLLAMA_MODEL"):
        monkeypatch.delenv(key, raising=False)
    r = client.get("/api/cforch/config")
    assert r.status_code == 200
    data = r.json()
    assert data["coordinator_url"] == ""
    assert data["ollama_url"] == ""
    assert data["license_key_set"] is False
 def test_config_reads_env_vars_when_no_yaml(client, monkeypatch):
    """Env vars populate fields when label_tool.yaml has no cforch section."""
    monkeypatch.setenv("CF_ORCH_URL",      "http://orch.example.com:7700")
    monkeypatch.setenv("CF_LICENSE_KEY",   "CFG-AVCT-TEST-TEST-TEST")
    monkeypatch.setenv("OLLAMA_HOST",      "http://ollama.local:11434")
    monkeypatch.setenv("OLLAMA_MODEL",     "mistral:7b")
    r = client.get("/api/cforch/config")
    assert r.status_code == 200
    data = r.json()
    assert data["coordinator_url"] == "http://orch.example.com:7700"
    assert data["ollama_url"]      == "http://ollama.local:11434"
    assert data["ollama_model"]    == "mistral:7b"
    assert data["license_key_set"] is True   # set, but value not exposed
 def test_config_yaml_overrides_env(client, config_dir, monkeypatch):
    """label_tool.yaml cforch values take priority over env vars."""
    monkeypatch.setenv("CF_ORCH_URL",  "http://env-orch:7700")
    monkeypatch.setenv("OLLAMA_HOST",  "http://env-ollama:11434")
    _write_config(config_dir, {
        "coordinator_url": "http://yaml-orch:7700",
        "ollama_url":      "http://yaml-ollama:11434",
    })
    r = client.get("/api/cforch/config")
    assert r.status_code == 200
    data = r.json()
    assert data["coordinator_url"] == "http://yaml-orch:7700"
    assert data["ollama_url"]      == "http://yaml-ollama:11434"
    assert data["source"] == "yaml+env"
 def test_run_passes_license_key_env_to_subprocess(client, config_dir, tmp_path, monkeypatch):
    """CF_LICENSE_KEY must be forwarded to the benchmark subprocess env."""
    monkeypatch.setenv("CF_LICENSE_KEY", "CFG-AVCT-ENV-ONLY-KEY")
    bench_script = tmp_path / "benchmark.py"
    bench_script.write_text("# stub", encoding="utf-8")
    tasks_file   = tmp_path / "bench_tasks.yaml"
    tasks_file.write_text(yaml.dump({"tasks": []}), encoding="utf-8")
    models_file  = tmp_path / "bench_models.yaml"
    models_file.write_text(yaml.dump({"models": []}), encoding="utf-8")
    _write_config(config_dir, {
        "bench_script":  str(bench_script),
        "bench_tasks":   str(tasks_file),
        "bench_models":  str(models_file),
        "results_dir":   str(tmp_path / "results"),
        "python_bin":    "/usr/bin/python3",
    })
    captured_env: dict = {}
    def fake_popen(cmd, **kwargs):
        captured_env.update(kwargs.get("env", {}))
        mock = MagicMock()
        mock.stdout = iter([])
        mock.returncode = 0
        mock.wait = MagicMock()
        return mock
    with patch("app.cforch._subprocess.Popen", side_effect=fake_popen):
        client.get("/api/cforch/run")
    assert captured_env.get("CF_LICENSE_KEY") == "CFG-AVCT-ENV-ONLY-KEY"
--- a/tests/test_sft.py
+++ b/tests/test_sft.py
@ -10,14 +10,11 @@ def reset_sft_globals(tmp_path):
    from app import sft as sft_module
    _prev_data = sft_module._SFT_DATA_DIR
    _prev_cfg = sft_module._SFT_CONFIG_DIR
    _prev_default = sft_module._DEFAULT_BENCH_RESULTS_DIR
    sft_module.set_sft_data_dir(tmp_path)
    sft_module.set_sft_config_dir(tmp_path)
    sft_module.set_default_bench_results_dir(str(tmp_path / "bench_results"))
    yield
    sft_module.set_sft_data_dir(_prev_data)
    sft_module.set_sft_config_dir(_prev_cfg)
    sft_module.set_default_bench_results_dir(_prev_default)
@pytest.fixture
--- a/web/src/views/SettingsView.vue
+++ b/web/src/views/SettingsView.vue
@ -115,18 +115,8 @@
      <h2 class="section-title">cf-orch Integration</h2>
      <p class="section-desc">
        Import SFT (supervised fine-tuning) candidates from cf-orch benchmark runs.
        Connection settings fall back to environment variables
        (<code>CF_ORCH_URL</code>, <code>CF_LICENSE_KEY</code>, <code>OLLAMA_HOST</code>)
        when not set here.
      </p>
      <!-- Connection status pill -->
      <div v-if="orchConfig" class="orch-status-row">
        <span class="orch-status-pill" :class="orchStatusClass">{{ orchStatusLabel }}</span>
        <span v-if="orchConfig.source === 'env'" class="orch-source-note">via env vars</span>
        <span v-else class="orch-source-note">via label_tool.yaml</span>
      </div>
      <div class="field-row">
        <label class="field field-grow">
          <span>bench_results_dir</span>
@ -191,7 +181,7 @@
 </template>
 <script setup lang="ts">
-import { ref, computed, onMounted } from 'vue'
+import { ref, onMounted } from 'vue'
 import { useApiFetch } from '../composables/useApi'
 interface Account {
@ -209,27 +199,12 @@ const saveOk        = ref(true)
 const richMotion    = ref(localStorage.getItem('cf-avocet-rich-motion') !== 'false')
 const keyHints      = ref(localStorage.getItem('cf-avocet-key-hints') !== 'false')
-// SFT / cf-orch integration state
+// SFT integration state
 const benchResultsDir = ref('')
 const runs            = ref<Array<{ run_id: string; timestamp: string; candidate_count: number; already_imported: boolean }>>([])
 const importingRunId  = ref<string | null>(null)
 const importResult    = ref<{ imported: number; skipped: number } | null>(null)
 const saveStatus      = ref('')
 const orchConfig      = ref<{ coordinator_url: string; ollama_url: string; ollama_model: string; license_key_set: boolean; source: string } | null>(null)
 const orchStatusClass = computed(() => {
  if (!orchConfig.value) return 'status-unknown'
  if (orchConfig.value.coordinator_url) return 'status-connected'
  if (orchConfig.value.ollama_url) return 'status-local'
  return 'status-unconfigured'
 })
 const orchStatusLabel = computed(() => {
  if (!orchConfig.value) return 'Unknown'
  if (orchConfig.value.coordinator_url) return '● cf-orch coordinator'
  if (orchConfig.value.ollama_url) return '● Ollama (local)'
  return '○ Not configured'
 })
 async function loadSftConfig() {
  try {
@ -243,15 +218,6 @@ async function loadSftConfig() {
  }
 }
 async function loadOrchConfig() {
  try {
    const res = await fetch('/api/cforch/config')
    if (res.ok) orchConfig.value = await res.json()
  } catch {
    // non-fatal
  }
 }
 async function saveSftConfig() {
  saveStatus.value = 'Saving…'
  try {
@ -371,7 +337,6 @@ function onKeyHintsChange() {
 onMounted(() => {
  reload()
  loadSftConfig()
  loadOrchConfig()
 })
 </script>
@ -599,31 +564,6 @@ onMounted(() => {
  width: 100%;
 }
 .orch-status-row {
  display: flex;
  align-items: center;
  gap: var(--space-2);
  margin-bottom: var(--space-3);
 }
 .orch-status-pill {
  font-size: 0.8rem;
  font-weight: 600;
  padding: var(--space-1) var(--space-3);
  border-radius: var(--radius-full);
 }
 .status-connected    { background: color-mix(in srgb, var(--color-success, #3a7a32) 12%, transparent); color: var(--color-success, #3a7a32); }
 .status-local        { background: color-mix(in srgb, var(--color-primary) 12%, transparent); color: var(--color-primary); }
 .status-unconfigured { background: var(--color-surface-alt); color: var(--color-text-muted); }
 .status-unknown      { background: var(--color-surface-alt); color: var(--color-text-muted); }
 .orch-source-note {
  font-size: 0.75rem;
  color: var(--color-text-muted);
  font-style: italic;
 }
 .runs-table {
  width: 100%;
  border-collapse: collapse;