Compare commits

..

2 commits

Author SHA1 Message Date
118ae2660a feat: Imitate tab — pull CF product samples, compare LLM responses
Backend (app/imitate.py):
- GET /api/imitate/products — reads imitate: config, checks online status
- GET /api/imitate/products/{id}/sample — fetches real item from product API
- GET /api/imitate/run (SSE) — streams ollama responses for selected models
- POST /api/imitate/push-corrections — queues results in SFT corrections JSONL

Frontend (ImitateView.vue):
- Step 1: product picker grid (online/offline status, icon from config)
- Step 2: raw sample preview + editable prompt textarea
- Step 3: ollama model multi-select, temperature slider, SSE run with live log
- Step 4: response cards side by side, push to Corrections button

Wiring:
- app/api.py: include imitate_router at /api/imitate
- web/src/router: /imitate route + lazy import
- AppSidebar: Imitate nav entry (mirror icon)
- config/label_tool.yaml.example: imitate: section with peregrine example
- 16 unit tests (100% passing)

Also: BenchmarkView.vue Compare panel — side-by-side run diff for bench results
2026-04-09 20:04:45 -07:00
dc2dc70ef9 test: fix test_tasks_parses_yaml for TaskEntry schema
TaskEntry now includes prompt/system fields (default ""). Switch from
exact dict comparison to field-by-field assertions so the test is
forward-compatible with optional schema additions.
2026-04-09 20:02:12 -07:00
8 changed files with 24 additions and 266 deletions

View file

@ -1,19 +0,0 @@
# Avocet — environment variable configuration
# Copy to .env and fill in values. All keys are optional.
# label_tool.yaml takes precedence over env vars where both exist.
# ── Local inference (Ollama) ───────────────────────────────────────────────────
# OLLAMA_HOST defaults to http://localhost:11434 if unset.
OLLAMA_HOST=http://localhost:11434
OLLAMA_MODEL=llama3.2:3b
# ── cf-orch coordinator (paid/premium tiers) ───────────────────────────────────
# Required for multi-GPU LLM benchmarking via the cf-orch benchmark harness.
# Free-tier users can leave these unset and use Ollama only.
CF_ORCH_URL=http://localhost:7700
CF_LICENSE_KEY=CFG-AVCT-xxxx-xxxx-xxxx
# ── Cloud LLM backends (optional — paid/premium) ──────────────────────────────
# Set one of these to use a cloud LLM instead of a local model.
# ANTHROPIC_API_KEY=sk-ant-...
# OPENAI_API_KEY=sk-...

View file

@ -14,7 +14,6 @@ from __future__ import annotations
import json import json
import logging import logging
import os
import re import re
import subprocess as _subprocess import subprocess as _subprocess
from pathlib import Path from pathlib import Path
@ -50,32 +49,16 @@ def _config_file() -> Path:
def _load_cforch_config() -> dict: def _load_cforch_config() -> dict:
"""Read label_tool.yaml cforch section, falling back to environment variables. """Read label_tool.yaml and return the cforch sub-dict (or {} if absent/malformed)."""
Priority (highest to lowest):
1. label_tool.yaml cforch: key
2. Environment variables (CF_ORCH_URL, CF_LICENSE_KEY, OLLAMA_HOST, OLLAMA_MODEL)
"""
f = _config_file() f = _config_file()
file_cfg: dict = {} if not f.exists():
if f.exists(): return {}
try: try:
raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {} raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
file_cfg = raw.get("cforch", {}) or {}
except yaml.YAMLError as exc: except yaml.YAMLError as exc:
logger.warning("Failed to parse cforch config %s: %s", f, exc) logger.warning("Failed to parse cforch config %s: %s", f, exc)
return {}
# Env var fallbacks — only used when the yaml key is absent or empty return raw.get("cforch", {}) or {}
def _coalesce(file_val: str, env_key: str) -> str:
return file_val if file_val else os.environ.get(env_key, "")
return {
**file_cfg,
"coordinator_url": _coalesce(file_cfg.get("coordinator_url", ""), "CF_ORCH_URL"),
"license_key": _coalesce(file_cfg.get("license_key", ""), "CF_LICENSE_KEY"),
"ollama_url": _coalesce(file_cfg.get("ollama_url", ""), "OLLAMA_HOST"),
"ollama_model": _coalesce(file_cfg.get("ollama_model", ""), "OLLAMA_MODEL"),
}
def _strip_ansi(text: str) -> str: def _strip_ansi(text: str) -> str:
@ -204,7 +187,6 @@ def run_benchmark(
python_bin = cfg.get("python_bin", "/devl/miniconda3/envs/cf/bin/python") python_bin = cfg.get("python_bin", "/devl/miniconda3/envs/cf/bin/python")
cfg_coordinator = cfg.get("coordinator_url", "") cfg_coordinator = cfg.get("coordinator_url", "")
cfg_ollama = cfg.get("ollama_url", "") cfg_ollama = cfg.get("ollama_url", "")
cfg_license_key = cfg.get("license_key", "")
def generate(): def generate():
global _BENCH_RUNNING, _bench_proc global _BENCH_RUNNING, _bench_proc
@ -226,7 +208,6 @@ def run_benchmark(
if model_tags: if model_tags:
cmd.extend(["--filter-tags"] + model_tags.split(",")) cmd.extend(["--filter-tags"] + model_tags.split(","))
# query param overrides config, config overrides env var (already resolved by _load_cforch_config)
effective_coordinator = coordinator_url if coordinator_url else cfg_coordinator effective_coordinator = coordinator_url if coordinator_url else cfg_coordinator
effective_ollama = ollama_url if ollama_url else cfg_ollama effective_ollama = ollama_url if ollama_url else cfg_ollama
if effective_coordinator: if effective_coordinator:
@ -234,11 +215,6 @@ def run_benchmark(
if effective_ollama: if effective_ollama:
cmd.extend(["--ollama-url", effective_ollama]) cmd.extend(["--ollama-url", effective_ollama])
# Pass license key as env var so subprocess can authenticate with cf-orch
proc_env = {**os.environ}
if cfg_license_key:
proc_env["CF_LICENSE_KEY"] = cfg_license_key
_BENCH_RUNNING = True _BENCH_RUNNING = True
try: try:
proc = _subprocess.Popen( proc = _subprocess.Popen(
@ -247,7 +223,6 @@ def run_benchmark(
stderr=_subprocess.STDOUT, stderr=_subprocess.STDOUT,
text=True, text=True,
bufsize=1, bufsize=1,
env=proc_env,
) )
_bench_proc = proc _bench_proc = proc
try: try:
@ -281,25 +256,6 @@ def run_benchmark(
) )
# ── GET /config ────────────────────────────────────────────────────────────────
@router.get("/config")
def get_cforch_config() -> dict:
"""Return resolved cf-orch connection config (env vars merged with yaml).
Redacts license_key only returns whether it is set, not the value.
Used by the Settings UI to show current connection state.
"""
cfg = _load_cforch_config()
return {
"coordinator_url": cfg.get("coordinator_url", ""),
"ollama_url": cfg.get("ollama_url", ""),
"ollama_model": cfg.get("ollama_model", ""),
"license_key_set": bool(cfg.get("license_key", "")),
"source": "env" if not _config_file().exists() else "yaml+env",
}
# ── GET /results ─────────────────────────────────────────────────────────────── # ── GET /results ───────────────────────────────────────────────────────────────
@router.get("/results") @router.get("/results")

View file

@ -51,26 +51,17 @@ def _config_file() -> Path:
return _ROOT / "config" / "label_tool.yaml" return _ROOT / "config" / "label_tool.yaml"
_DEFAULT_BENCH_RESULTS_DIR = "/Library/Development/CircuitForge/circuitforge-orch/scripts/bench_results"
def set_default_bench_results_dir(path: str) -> None:
"""Override the default bench_results_dir — used by tests to avoid real filesystem."""
global _DEFAULT_BENCH_RESULTS_DIR
_DEFAULT_BENCH_RESULTS_DIR = path
def _get_bench_results_dir() -> Path: def _get_bench_results_dir() -> Path:
f = _config_file() f = _config_file()
if f.exists(): if not f.exists():
return Path("/nonexistent-bench-results")
try: try:
raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {} raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
d = raw.get("sft", {}).get("bench_results_dir", "")
if d:
return Path(d)
except yaml.YAMLError as exc: except yaml.YAMLError as exc:
logger.warning("Failed to parse SFT config %s: %s", f, exc) logger.warning("Failed to parse SFT config %s: %s", f, exc)
return Path(_DEFAULT_BENCH_RESULTS_DIR) return Path("/nonexistent-bench-results")
d = raw.get("sft", {}).get("bench_results_dir", "")
return Path(d) if d else Path("/nonexistent-bench-results")
def _candidates_file() -> Path: def _candidates_file() -> Path:

View file

@ -27,26 +27,6 @@ max_per_account: 500
sft: sft:
bench_results_dir: /path/to/circuitforge-orch/scripts/bench_results bench_results_dir: /path/to/circuitforge-orch/scripts/bench_results
# cf-orch integration — LLM benchmark harness via cf-orch coordinator.
# All keys here override the corresponding environment variables.
# Omit any key to fall back to the env var (see .env.example).
cforch:
# Path to cf-orch's benchmark.py script
bench_script: /path/to/circuitforge-orch/scripts/benchmark.py
# Task and model definition files (yaml)
bench_tasks: /path/to/circuitforge-orch/scripts/bench_tasks.yaml
bench_models: /path/to/circuitforge-orch/scripts/bench_models.yaml
# Where benchmark results are written (also used for SFT candidate discovery)
results_dir: /path/to/circuitforge-orch/scripts/bench_results
# Python interpreter with cf-orch installed
python_bin: /devl/miniconda3/envs/cf/bin/python
# Connection config — override env vars CF_ORCH_URL / CF_LICENSE_KEY / OLLAMA_HOST
# coordinator_url: http://localhost:7700
# license_key: CFG-AVCT-xxxx-xxxx-xxxx
# ollama_url: http://localhost:11434
# ollama_model: llama3.2:3b
# Imitate tab — pull real samples from sibling CF product APIs and run them # Imitate tab — pull real samples from sibling CF product APIs and run them
# through local LLMs to build a corrections dataset. # through local LLMs to build a corrections dataset.
# ollama_url defaults to cforch.ollama_url if omitted here. # ollama_url defaults to cforch.ollama_url if omitted here.

View file

@ -22,8 +22,5 @@ dependencies:
# Optional: BGE reranker adapter # Optional: BGE reranker adapter
# - FlagEmbedding # - FlagEmbedding
# CircuitForge shared core (LLM router, tier system, config)
- circuitforge-core>=0.9.0
# Dev # Dev
- pytest>=8.0 - pytest>=8.0

View file

@ -283,87 +283,3 @@ def test_cancel_terminates_running_benchmark(client):
mock_proc.terminate.assert_called_once() mock_proc.terminate.assert_called_once()
assert cforch_module._BENCH_RUNNING is False assert cforch_module._BENCH_RUNNING is False
assert cforch_module._bench_proc is None assert cforch_module._bench_proc is None
# ── GET /config ────────────────────────────────────────────────────────────────
def test_config_returns_empty_when_no_yaml_no_env(client, monkeypatch):
"""No yaml, no env vars — all fields empty, license_key_set False."""
for key in ("CF_ORCH_URL", "CF_LICENSE_KEY", "OLLAMA_HOST", "OLLAMA_MODEL"):
monkeypatch.delenv(key, raising=False)
r = client.get("/api/cforch/config")
assert r.status_code == 200
data = r.json()
assert data["coordinator_url"] == ""
assert data["ollama_url"] == ""
assert data["license_key_set"] is False
def test_config_reads_env_vars_when_no_yaml(client, monkeypatch):
"""Env vars populate fields when label_tool.yaml has no cforch section."""
monkeypatch.setenv("CF_ORCH_URL", "http://orch.example.com:7700")
monkeypatch.setenv("CF_LICENSE_KEY", "CFG-AVCT-TEST-TEST-TEST")
monkeypatch.setenv("OLLAMA_HOST", "http://ollama.local:11434")
monkeypatch.setenv("OLLAMA_MODEL", "mistral:7b")
r = client.get("/api/cforch/config")
assert r.status_code == 200
data = r.json()
assert data["coordinator_url"] == "http://orch.example.com:7700"
assert data["ollama_url"] == "http://ollama.local:11434"
assert data["ollama_model"] == "mistral:7b"
assert data["license_key_set"] is True # set, but value not exposed
def test_config_yaml_overrides_env(client, config_dir, monkeypatch):
"""label_tool.yaml cforch values take priority over env vars."""
monkeypatch.setenv("CF_ORCH_URL", "http://env-orch:7700")
monkeypatch.setenv("OLLAMA_HOST", "http://env-ollama:11434")
_write_config(config_dir, {
"coordinator_url": "http://yaml-orch:7700",
"ollama_url": "http://yaml-ollama:11434",
})
r = client.get("/api/cforch/config")
assert r.status_code == 200
data = r.json()
assert data["coordinator_url"] == "http://yaml-orch:7700"
assert data["ollama_url"] == "http://yaml-ollama:11434"
assert data["source"] == "yaml+env"
def test_run_passes_license_key_env_to_subprocess(client, config_dir, tmp_path, monkeypatch):
"""CF_LICENSE_KEY must be forwarded to the benchmark subprocess env."""
monkeypatch.setenv("CF_LICENSE_KEY", "CFG-AVCT-ENV-ONLY-KEY")
bench_script = tmp_path / "benchmark.py"
bench_script.write_text("# stub", encoding="utf-8")
tasks_file = tmp_path / "bench_tasks.yaml"
tasks_file.write_text(yaml.dump({"tasks": []}), encoding="utf-8")
models_file = tmp_path / "bench_models.yaml"
models_file.write_text(yaml.dump({"models": []}), encoding="utf-8")
_write_config(config_dir, {
"bench_script": str(bench_script),
"bench_tasks": str(tasks_file),
"bench_models": str(models_file),
"results_dir": str(tmp_path / "results"),
"python_bin": "/usr/bin/python3",
})
captured_env: dict = {}
def fake_popen(cmd, **kwargs):
captured_env.update(kwargs.get("env", {}))
mock = MagicMock()
mock.stdout = iter([])
mock.returncode = 0
mock.wait = MagicMock()
return mock
with patch("app.cforch._subprocess.Popen", side_effect=fake_popen):
client.get("/api/cforch/run")
assert captured_env.get("CF_LICENSE_KEY") == "CFG-AVCT-ENV-ONLY-KEY"

View file

@ -10,14 +10,11 @@ def reset_sft_globals(tmp_path):
from app import sft as sft_module from app import sft as sft_module
_prev_data = sft_module._SFT_DATA_DIR _prev_data = sft_module._SFT_DATA_DIR
_prev_cfg = sft_module._SFT_CONFIG_DIR _prev_cfg = sft_module._SFT_CONFIG_DIR
_prev_default = sft_module._DEFAULT_BENCH_RESULTS_DIR
sft_module.set_sft_data_dir(tmp_path) sft_module.set_sft_data_dir(tmp_path)
sft_module.set_sft_config_dir(tmp_path) sft_module.set_sft_config_dir(tmp_path)
sft_module.set_default_bench_results_dir(str(tmp_path / "bench_results"))
yield yield
sft_module.set_sft_data_dir(_prev_data) sft_module.set_sft_data_dir(_prev_data)
sft_module.set_sft_config_dir(_prev_cfg) sft_module.set_sft_config_dir(_prev_cfg)
sft_module.set_default_bench_results_dir(_prev_default)
@pytest.fixture @pytest.fixture

View file

@ -115,18 +115,8 @@
<h2 class="section-title">cf-orch Integration</h2> <h2 class="section-title">cf-orch Integration</h2>
<p class="section-desc"> <p class="section-desc">
Import SFT (supervised fine-tuning) candidates from cf-orch benchmark runs. Import SFT (supervised fine-tuning) candidates from cf-orch benchmark runs.
Connection settings fall back to environment variables
(<code>CF_ORCH_URL</code>, <code>CF_LICENSE_KEY</code>, <code>OLLAMA_HOST</code>)
when not set here.
</p> </p>
<!-- Connection status pill -->
<div v-if="orchConfig" class="orch-status-row">
<span class="orch-status-pill" :class="orchStatusClass">{{ orchStatusLabel }}</span>
<span v-if="orchConfig.source === 'env'" class="orch-source-note">via env vars</span>
<span v-else class="orch-source-note">via label_tool.yaml</span>
</div>
<div class="field-row"> <div class="field-row">
<label class="field field-grow"> <label class="field field-grow">
<span>bench_results_dir</span> <span>bench_results_dir</span>
@ -191,7 +181,7 @@
</template> </template>
<script setup lang="ts"> <script setup lang="ts">
import { ref, computed, onMounted } from 'vue' import { ref, onMounted } from 'vue'
import { useApiFetch } from '../composables/useApi' import { useApiFetch } from '../composables/useApi'
interface Account { interface Account {
@ -209,27 +199,12 @@ const saveOk = ref(true)
const richMotion = ref(localStorage.getItem('cf-avocet-rich-motion') !== 'false') const richMotion = ref(localStorage.getItem('cf-avocet-rich-motion') !== 'false')
const keyHints = ref(localStorage.getItem('cf-avocet-key-hints') !== 'false') const keyHints = ref(localStorage.getItem('cf-avocet-key-hints') !== 'false')
// SFT / cf-orch integration state // SFT integration state
const benchResultsDir = ref('') const benchResultsDir = ref('')
const runs = ref<Array<{ run_id: string; timestamp: string; candidate_count: number; already_imported: boolean }>>([]) const runs = ref<Array<{ run_id: string; timestamp: string; candidate_count: number; already_imported: boolean }>>([])
const importingRunId = ref<string | null>(null) const importingRunId = ref<string | null>(null)
const importResult = ref<{ imported: number; skipped: number } | null>(null) const importResult = ref<{ imported: number; skipped: number } | null>(null)
const saveStatus = ref('') const saveStatus = ref('')
const orchConfig = ref<{ coordinator_url: string; ollama_url: string; ollama_model: string; license_key_set: boolean; source: string } | null>(null)
const orchStatusClass = computed(() => {
if (!orchConfig.value) return 'status-unknown'
if (orchConfig.value.coordinator_url) return 'status-connected'
if (orchConfig.value.ollama_url) return 'status-local'
return 'status-unconfigured'
})
const orchStatusLabel = computed(() => {
if (!orchConfig.value) return 'Unknown'
if (orchConfig.value.coordinator_url) return '● cf-orch coordinator'
if (orchConfig.value.ollama_url) return '● Ollama (local)'
return '○ Not configured'
})
async function loadSftConfig() { async function loadSftConfig() {
try { try {
@ -243,15 +218,6 @@ async function loadSftConfig() {
} }
} }
async function loadOrchConfig() {
try {
const res = await fetch('/api/cforch/config')
if (res.ok) orchConfig.value = await res.json()
} catch {
// non-fatal
}
}
async function saveSftConfig() { async function saveSftConfig() {
saveStatus.value = 'Saving…' saveStatus.value = 'Saving…'
try { try {
@ -371,7 +337,6 @@ function onKeyHintsChange() {
onMounted(() => { onMounted(() => {
reload() reload()
loadSftConfig() loadSftConfig()
loadOrchConfig()
}) })
</script> </script>
@ -599,31 +564,6 @@ onMounted(() => {
width: 100%; width: 100%;
} }
.orch-status-row {
display: flex;
align-items: center;
gap: var(--space-2);
margin-bottom: var(--space-3);
}
.orch-status-pill {
font-size: 0.8rem;
font-weight: 600;
padding: var(--space-1) var(--space-3);
border-radius: var(--radius-full);
}
.status-connected { background: color-mix(in srgb, var(--color-success, #3a7a32) 12%, transparent); color: var(--color-success, #3a7a32); }
.status-local { background: color-mix(in srgb, var(--color-primary) 12%, transparent); color: var(--color-primary); }
.status-unconfigured { background: var(--color-surface-alt); color: var(--color-text-muted); }
.status-unknown { background: var(--color-surface-alt); color: var(--color-text-muted); }
.orch-source-note {
font-size: 0.75rem;
color: var(--color-text-muted);
font-style: italic;
}
.runs-table { .runs-table {
width: 100%; width: 100%;
border-collapse: collapse; border-collapse: collapse;