feat(imitate): parallel cf-text fanout workers + signal-based cold-start detection

Backend:
- Run all cf-text model allocations concurrently via ThreadPoolExecutor + as_completed
- Announce model_start events upfront so the UI can show loading states immediately
- Replace timer-based startup polling with coordinator state signals: waits for
  state=="running" (success) or state=="stopped" (fail-fast) on the matching
  node/gpu instance; falls back to health poll after 6 consecutive probe misses
- Add /api/cforch/catalog endpoint: fetches live cf-text model list from cf-orch,
  filtering out proxy entries (ollama://, vllm://, http://) so only loadable models
  are returned

Frontend (ImitateView.vue):
- Show per-model loading spinners as results arrive via SSE stream
- Display cold-start badge when coordinator signals the model was freshly loaded
This commit is contained in:
pyr0ball 2026-04-24 14:56:09 -07:00
parent e6b64d6efe
commit cc24cd0d7d
3 changed files with 462 additions and 30 deletions

View file

@ -155,6 +155,9 @@ app.include_router(cforch_router, prefix="/api/cforch")
from app.imitate import router as imitate_router from app.imitate import router as imitate_router
app.include_router(imitate_router, prefix="/api/imitate") app.include_router(imitate_router, prefix="/api/imitate")
from app.style import router as style_router
app.include_router(style_router, prefix="/api/style")
# In-memory last-action store (single user, local tool — in-memory is fine) # In-memory last-action store (single user, local tool — in-memory is fine)
_last_action: dict | None = None _last_action: dict | None = None

View file

@ -11,6 +11,7 @@ override _CONFIG_DIR and _DATA_DIR via set_config_dir() / set_data_dir() in test
""" """
from __future__ import annotations from __future__ import annotations
import base64
import json import json
import logging import logging
import time import time
@ -21,6 +22,7 @@ from typing import Any
from urllib.error import URLError from urllib.error import URLError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
import httpx
import yaml import yaml
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
@ -87,6 +89,45 @@ def _ollama_url(cfg: dict) -> str:
return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434" return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434"
def _cforch_url() -> str:
cforch = _load_cforch_config()
return cforch.get("coordinator_url") or "http://localhost:7700"
def _cforch_catalog(cforch_base: str) -> list[dict]:
"""Fetch the live cf-text catalog from cf-orch.
Filters out proxy entries (ollama://, vllm://, http://) those models are
served by their own services and should not be allocated via cf-text.
Returns only models with real file-system paths that cf-text can load directly.
"""
try:
resp = httpx.get(
f"{cforch_base}/api/services/cf-text/catalog",
params={"node_id": "heimdall"},
timeout=5.0,
)
resp.raise_for_status()
raw = resp.json()
result = []
for model_id, entry in raw.items():
if not isinstance(entry, dict):
continue
path = entry.get("path", "")
# Skip proxy entries — they're routed through other services
if "://" in path:
continue
result.append({
"id": model_id,
"vram_mb": entry.get("vram_mb", 0),
"description": entry.get("description", ""),
})
return result
except Exception as exc:
logger.warning("Could not fetch cf-orch catalog: %s", exc)
return []
def _http_get_json(url: str, timeout: int = 5) -> Any: def _http_get_json(url: str, timeout: int = 5) -> Any:
"""Fetch JSON from url; raise URLError on failure.""" """Fetch JSON from url; raise URLError on failure."""
req = Request(url, headers={"Accept": "application/json"}) req = Request(url, headers={"Accept": "application/json"})
@ -104,18 +145,29 @@ def _is_online(base_url: str, health_path: str = "/api/health") -> bool:
def _extract_sample( def _extract_sample(
raw: Any, text_fields: list[str], sample_index: int = 0 raw: Any,
text_fields: list[str],
sample_index: int = 0,
sample_key: str | None = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Pull one item from a list or dict response and extract text_fields.""" """Pull one item from a list or dict response and extract text_fields.
sample_key: if provided, unwrap raw[sample_key] before looking for a list.
Falls back to a set of conventional envelope keys if sample_key is absent.
"""
item: dict[str, Any] item: dict[str, Any]
if isinstance(raw, list): if isinstance(raw, list):
if not raw: if not raw:
return {} return {}
item = raw[min(sample_index, len(raw) - 1)] item = raw[min(sample_index, len(raw) - 1)]
elif isinstance(raw, dict): elif isinstance(raw, dict):
# may be {items: [...]} or the item itself # Use declared sample_key first, then fall back to conventional names.
for key in ("items", "results", "data", "jobs", "listings", "pantry", _ENVELOPE_KEYS = (
"saved_searches", "entries", "calls", "records"): "samples", "items", "results", "data", "jobs", "listings",
"pantry", "saved_searches", "entries", "calls", "records",
)
search_keys = ([sample_key] if sample_key else []) + list(_ENVELOPE_KEYS)
for key in search_keys:
if key in raw and isinstance(raw[key], list): if key in raw and isinstance(raw[key], list):
lst = raw[key] lst = raw[key]
item = lst[min(sample_index, len(lst) - 1)] if lst else {} item = lst[min(sample_index, len(lst) - 1)] if lst else {}
@ -141,24 +193,49 @@ def _sse(data: dict) -> str:
return f"data: {json.dumps(data)}\n\n" return f"data: {json.dumps(data)}\n\n"
def _fetch_image_b64(image_url: str) -> str:
"""Download an image URL and return it as a base64 string for ollama.
Returns empty string on any failure a missing image is non-fatal;
the model will still run against the text prompt alone.
"""
try:
req = Request(image_url, headers={"User-Agent": "Avocet/1.0"})
with urlopen(req, timeout=10) as resp:
return base64.b64encode(resp.read()).decode("ascii")
except Exception as exc:
logger.warning("Failed to fetch image %s: %s", image_url, exc)
return ""
def _run_ollama_streaming( def _run_ollama_streaming(
ollama_base: str, ollama_base: str,
model_id: str, model_id: str,
prompt: str, prompt: str,
temperature: float, temperature: float,
system: str = "",
images: list[str] | None = None,
) -> tuple[str, int]: ) -> tuple[str, int]:
"""Call ollama /api/generate with stream=True; return (full_response, elapsed_ms). """Call ollama /api/generate with stream=False; return (full_response, elapsed_ms).
Blocks until the model finishes; yields nothing streaming is handled by Blocks until the model finishes; yields nothing streaming is handled by
the SSE generator in run_imitate(). the SSE generator in run_imitate().
system: optional system prompt passed as a separate field to ollama.
images: list of base64-encoded image strings (vision models only).
""" """
url = f"{ollama_base.rstrip('/')}/api/generate" url = f"{ollama_base.rstrip('/')}/api/generate"
payload = json.dumps({ body: dict = {
"model": model_id, "model": model_id,
"prompt": prompt, "prompt": prompt,
"stream": False, "stream": False,
"options": {"temperature": temperature}, "options": {"temperature": temperature},
}).encode("utf-8") }
if system:
body["system"] = system
if images:
body["images"] = images
payload = json.dumps(body).encode("utf-8")
req = Request(url, data=payload, method="POST", req = Request(url, data=payload, method="POST",
headers={"Content-Type": "application/json"}) headers={"Content-Type": "application/json"})
t0 = time.time() t0 = time.time()
@ -172,6 +249,122 @@ def _run_ollama_streaming(
raise RuntimeError(str(exc)) from exc raise RuntimeError(str(exc)) from exc
def _run_cftext(
cforch_base: str,
model_id: str,
prompt: str,
system: str,
temperature: float,
startup_timeout_s: float = 180.0,
) -> tuple[str, int, bool]:
"""Allocate cf-text via cf-orch, generate, release. Returns (response, elapsed_ms, cold_started).
Raises RuntimeError on allocation failure or generation error.
cold_started=True means the service was launched from scratch (caller may log this).
Cold-start detection uses coordinator state signals (running/stopped) rather than
polling the service health endpoint this fails fast on model load errors instead
of waiting out the full timeout.
"""
# Allocate
alloc_resp = httpx.post(
f"{cforch_base}/api/services/cf-text/allocate",
json={
"model_candidates": [model_id],
"caller": "avocet",
"pipeline": "imitate",
},
timeout=30.0,
)
alloc_resp.raise_for_status()
data = alloc_resp.json()
service_url: str = data["url"]
allocation_id: str = data.get("allocation_id", "")
node_id: str = data.get("node_id", "")
gpu_id: int | None = data.get("gpu_id")
cold_started = data.get("started", False) and not data.get("warm", True)
# Wait for ready using coordinator state signals
if cold_started:
deadline = time.monotonic() + startup_timeout_s
probe_misses = 0
while time.monotonic() < deadline:
try:
status = httpx.get(
f"{cforch_base}/api/services/cf-text/status", timeout=5.0
)
if status.is_success:
instances = status.json().get("instances", [])
match = next(
(i for i in instances
if i.get("node_id") == node_id and i.get("gpu_id") == gpu_id),
None,
)
if match:
probe_misses = 0
state = match.get("state", "")
if state == "running":
break
elif state == "stopped":
if allocation_id:
httpx.delete(
f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}",
timeout=5.0,
)
raise RuntimeError(f"cf-text failed to load {model_id!r} (service stopped)")
else:
probe_misses += 1
if probe_misses >= 6:
# Coordinator hasn't registered instance yet — fall back to health poll
try:
if httpx.get(f"{service_url}/health", timeout=3.0).is_success:
break
except Exception:
pass
except RuntimeError:
raise
except Exception:
pass
time.sleep(2.0)
else:
if allocation_id:
httpx.delete(f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}", timeout=5.0)
raise RuntimeError(f"cf-text cold start timed out after {startup_timeout_s:.0f}s")
# Generate
messages: list[dict] = []
if system:
messages.append({"role": "system", "content": system})
messages.append({"role": "user", "content": prompt})
t0 = time.time()
try:
gen_resp = httpx.post(
f"{service_url}/v1/chat/completions",
json={
"model": model_id,
"messages": messages,
"max_tokens": 300,
"temperature": temperature,
"stream": False,
},
timeout=120.0,
)
gen_resp.raise_for_status()
elapsed_ms = int((time.time() - t0) * 1000)
content = gen_resp.json()["choices"][0]["message"]["content"]
return content.strip(), elapsed_ms, cold_started
except Exception as exc:
elapsed_ms = int((time.time() - t0) * 1000)
raise RuntimeError(str(exc)) from exc
finally:
if allocation_id:
try:
httpx.delete(f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}", timeout=5.0)
except Exception:
pass
# ── GET /products ────────────────────────────────────────────────────────────── # ── GET /products ──────────────────────────────────────────────────────────────
@router.get("/products") @router.get("/products")
@ -226,52 +419,96 @@ def get_sample(product_id: str, index: int = 0) -> dict:
raise HTTPException(502, f"Bad response from product API: {exc}") from exc raise HTTPException(502, f"Bad response from product API: {exc}") from exc
text_fields = product.get("text_fields", []) or [] text_fields = product.get("text_fields", []) or []
extracted = _extract_sample(raw, text_fields, index) sample_key = product.get("sample_key") or None
extracted = _extract_sample(raw, text_fields, index, sample_key=sample_key)
if not extracted: if not extracted:
raise HTTPException(404, "No sample items returned by product API") raise HTTPException(404, "No sample items returned by product API")
prompt_template = product.get("prompt_template", "{text}") prompt_template = product.get("prompt_template", "{text}")
prompt = prompt_template.replace("{text}", extracted["text"]) prompt = prompt_template.replace("{text}", extracted["text"])
# Also substitute any {field_name} placeholders from the raw item fields.
item = extracted.get("item", {})
for field, val in item.items():
prompt = prompt.replace(f"{{{field}}}", str(val) if val is not None else "")
# Expose system_prompt and image_url if the product API returns them.
# system_prompt: Peregrine, Snipe (vision analysis instructions)
# image_url: Snipe listing photos — Avocet downloads + base64-encodes at run time
item = extracted.get("item", {})
system_prompt = str(item.get("system_prompt", "")) if isinstance(item, dict) else ""
image_url = str(item.get("image_url", "")) if isinstance(item, dict) else ""
return { return {
"product_id": product_id, "product_id": product_id,
"sample_index": index, "sample_index": index,
"text": extracted["text"], "text": extracted["text"],
"prompt": prompt, "prompt": prompt,
"raw_item": extracted.get("item", {}), "system_prompt": system_prompt,
"image_url": image_url,
"raw_item": item,
} }
# ── GET /catalog ───────────────────────────────────────────────────────────────
@router.get("/catalog")
def get_catalog() -> dict:
"""Return the live cf-text model catalog from cf-orch coordinator."""
models = _cforch_catalog(_cforch_url())
return {"models": models}
# ── GET /run (SSE) ───────────────────────────────────────────────────────────── # ── GET /run (SSE) ─────────────────────────────────────────────────────────────
@router.get("/run") @router.get("/run")
def run_imitate( def run_imitate(
prompt: str = "", prompt: str = "",
model_ids: str = "", # comma-separated ollama model IDs model_ids: str = "", # comma-separated ollama model IDs
cf_text_model_ids: str = "", # comma-separated cf-text model IDs (via cf-orch)
temperature: float = 0.7, temperature: float = 0.7,
product_id: str = "", product_id: str = "",
system: str = "", # optional system prompt
image_url: str = "", # optional image URL for vision models
) -> StreamingResponse: ) -> StreamingResponse:
"""Run a prompt through selected ollama models and stream results as SSE.""" """Run a prompt through selected ollama models and stream results as SSE.
If image_url is provided, the image is downloaded once and passed to every
model as a base64-encoded blob allowing vision-capable local models to
evaluate listing photos the same way Snipe's background task pipeline does.
"""
if not prompt.strip(): if not prompt.strip():
raise HTTPException(422, "prompt is required") raise HTTPException(422, "prompt is required")
ids = [m.strip() for m in model_ids.split(",") if m.strip()] ollama_ids = [m.strip() for m in model_ids.split(",") if m.strip()]
if not ids: cftext_ids = [m.strip() for m in cf_text_model_ids.split(",") if m.strip()]
raise HTTPException(422, "model_ids is required") if not ollama_ids and not cftext_ids:
raise HTTPException(422, "model_ids or cf_text_model_ids is required")
cfg = _load_imitate_config() cfg = _load_imitate_config()
ollama_base = _ollama_url(cfg) ollama_base = _ollama_url(cfg)
cforch_base = _cforch_url()
system_ctx = system.strip() or ""
total_models = len(ollama_ids) + len(cftext_ids)
# Download image once before streaming — shared across ollama vision models
images: list[str] = []
if image_url.strip():
b64 = _fetch_image_b64(image_url.strip())
if b64:
images = [b64]
def generate(): def generate():
results: list[dict] = [] results: list[dict] = []
yield _sse({"type": "start", "total_models": len(ids)}) yield _sse({"type": "start", "total_models": total_models, "has_image": bool(images)})
for model_id in ids: # Ollama models
yield _sse({"type": "model_start", "model": model_id}) for model_id in ollama_ids:
yield _sse({"type": "model_start", "model": model_id, "service": "ollama"})
try: try:
response, elapsed_ms = _run_ollama_streaming( response, elapsed_ms = _run_ollama_streaming(
ollama_base, model_id, prompt, temperature ollama_base, model_id, prompt, temperature,
system=system_ctx, images=images or None,
) )
result = { result = {
"model": model_id, "model": model_id,
@ -289,6 +526,41 @@ def run_imitate(
results.append(result) results.append(result)
yield _sse({"type": "model_done", **result}) yield _sse({"type": "model_done", **result})
# cf-text models via cf-orch — fan out in parallel when multiple models selected
if cftext_ids:
from concurrent.futures import ThreadPoolExecutor, as_completed
# Announce all models upfront so the UI can show loading states immediately
for model_id in cftext_ids:
yield _sse({"type": "model_start", "model": model_id, "service": "cf-text"})
with ThreadPoolExecutor(max_workers=len(cftext_ids)) as pool:
future_to_model = {
pool.submit(_run_cftext, cforch_base, mid, prompt, system_ctx, temperature): mid
for mid in cftext_ids
}
for future in as_completed(future_to_model):
model_id = future_to_model[future]
try:
response, elapsed_ms, cold_started = future.result()
if cold_started:
yield _sse({"type": "model_coldstart", "model": model_id})
result = {
"model": model_id,
"response": response,
"elapsed_ms": elapsed_ms,
"error": None,
}
except Exception as exc:
result = {
"model": model_id,
"response": "",
"elapsed_ms": 0,
"error": str(exc),
}
results.append(result)
yield _sse({"type": "model_done", **result})
yield _sse({"type": "complete", "results": results}) yield _sse({"type": "complete", "results": results})
return StreamingResponse( return StreamingResponse(

View file

@ -49,12 +49,30 @@
<div v-if="sampleLoading" class="picker-loading">Fetching sample from API</div> <div v-if="sampleLoading" class="picker-loading">Fetching sample from API</div>
<template v-else-if="rawSample"> <template v-else-if="rawSample">
<!-- Fetched text preview --> <!-- Listing image thumbnail (Snipe vision samples) -->
<details class="sample-preview" open> <div v-if="imageUrl" class="sample-image-row">
<img :src="imageUrl" class="sample-image-thumb" alt="Listing photo" @error="imageUrl = ''" />
<span class="image-badge">📷 image will be sent to vision models</span>
</div>
<!-- Fetched text preview (hidden when prompt_template is {input_text} with no text_fields) -->
<details v-if="rawSample.text" class="sample-preview" open>
<summary class="sample-preview-toggle">Raw sample text</summary> <summary class="sample-preview-toggle">Raw sample text</summary>
<pre class="sample-text">{{ rawSample.text }}</pre> <pre class="sample-text">{{ rawSample.text }}</pre>
</details> </details>
<!-- System context (shown only when the product provides one) -->
<template v-if="systemPrompt">
<details class="sample-preview">
<summary class="sample-preview-toggle">System context <span class="system-badge">sent separately to model</span></summary>
<textarea
class="prompt-editor system-editor"
v-model="systemPrompt"
rows="4"
/>
</details>
</template>
<!-- Prompt editor --> <!-- Prompt editor -->
<label class="prompt-label" for="prompt-editor">Prompt sent to models</label> <label class="prompt-label" for="prompt-editor">Prompt sent to models</label>
<textarea <textarea
@ -112,6 +130,42 @@
</div> </div>
</details> </details>
<!-- cf-text model picker (live catalog from cf-orch) -->
<details class="model-picker">
<summary class="picker-summary">
<span class="picker-title"> cf-text Models <span class="cforch-badge">via cf-orch</span></span>
<span class="picker-badge">{{ selectedCfTextModels.size }} / {{ cfTextCatalog.length }}</span>
</summary>
<div class="picker-body">
<div v-if="catalogLoading" class="picker-loading">Loading catalog from cf-orch</div>
<div v-else-if="cfTextCatalog.length === 0" class="picker-empty">
No cf-text models available check cf-orch coordinator is running.
</div>
<template v-else>
<label class="picker-cat-header">
<input
type="checkbox"
:checked="selectedCfTextModels.size === cfTextCatalog.length"
:indeterminate="selectedCfTextModels.size > 0 && selectedCfTextModels.size < cfTextCatalog.length"
@change="toggleAllCfText(($event.target as HTMLInputElement).checked)"
/>
<span class="picker-cat-name">All cf-text models</span>
</label>
<div class="picker-model-list">
<label v-for="m in cfTextCatalog" :key="m.id" class="picker-model-row">
<input
type="checkbox"
:checked="selectedCfTextModels.has(m.id)"
@change="toggleCfText(m.id, ($event.target as HTMLInputElement).checked)"
/>
<span class="picker-model-name" :title="m.description || m.id">{{ m.id }}</span>
<span v-if="m.vram_mb" class="tag">{{ Math.round(m.vram_mb / 1024 * 10) / 10 }}GB</span>
</label>
</div>
</template>
</div>
</details>
<!-- Temperature --> <!-- Temperature -->
<div class="temp-row"> <div class="temp-row">
<label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label> <label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label>
@ -128,7 +182,7 @@
<div class="run-row"> <div class="run-row">
<button <button
class="btn-run" class="btn-run"
:disabled="running || selectedModels.size === 0" :disabled="running || (selectedModels.size === 0 && selectedCfTextModels.size === 0)"
@click="startRun" @click="startRun"
> >
{{ running ? '⏳ Running…' : '▶ Run' }} {{ running ? '⏳ Running…' : '▶ Run' }}
@ -204,6 +258,8 @@ interface Sample {
sample_index: number sample_index: number
text: string text: string
prompt: string prompt: string
system_prompt: string
image_url: string
raw_item: Record<string, unknown> raw_item: Record<string, unknown>
} }
@ -215,6 +271,12 @@ interface ModelEntry {
vram_estimate_mb: number vram_estimate_mb: number
} }
interface CatalogEntry {
id: string
vram_mb: number
description: string
}
interface RunResult { interface RunResult {
model: string model: string
response: string response: string
@ -232,11 +294,17 @@ const sampleLoading = ref(false)
const sampleError = ref<string | null>(null) const sampleError = ref<string | null>(null)
const rawSample = ref<Sample | null>(null) const rawSample = ref<Sample | null>(null)
const editedPrompt = ref('') const editedPrompt = ref('')
const systemPrompt = ref('')
const imageUrl = ref('')
const modelsLoading = ref(false) const modelsLoading = ref(false)
const allModels = ref<ModelEntry[]>([]) const allModels = ref<ModelEntry[]>([])
const selectedModels = ref<Set<string>>(new Set()) const selectedModels = ref<Set<string>>(new Set())
const catalogLoading = ref(false)
const cfTextCatalog = ref<CatalogEntry[]>([])
const selectedCfTextModels = ref<Set<string>>(new Set())
const temperature = ref(0.7) const temperature = ref(0.7)
const running = ref(false) const running = ref(false)
@ -261,7 +329,7 @@ const successfulResults = computed(() =>
// Lifecycle // Lifecycle
onMounted(async () => { onMounted(async () => {
await Promise.all([loadProducts(), loadModels()]) await Promise.all([loadProducts(), loadModels(), loadCfTextCatalog()])
}) })
// Methods // Methods
@ -298,10 +366,38 @@ async function loadModels() {
} }
} }
async function loadCfTextCatalog() {
catalogLoading.value = true
try {
const resp = await fetch('/api/imitate/catalog')
if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
const data = await resp.json()
cfTextCatalog.value = data.models ?? []
} catch {
cfTextCatalog.value = []
} finally {
catalogLoading.value = false
}
}
function toggleCfText(id: string, checked: boolean) {
const next = new Set(selectedCfTextModels.value)
checked ? next.add(id) : next.delete(id)
selectedCfTextModels.value = next
}
function toggleAllCfText(checked: boolean) {
selectedCfTextModels.value = checked
? new Set(cfTextCatalog.value.map(m => m.id))
: new Set()
}
async function selectProduct(p: Product) { async function selectProduct(p: Product) {
selectedProduct.value = p selectedProduct.value = p
rawSample.value = null rawSample.value = null
editedPrompt.value = '' editedPrompt.value = ''
systemPrompt.value = ''
imageUrl.value = ''
sampleError.value = null sampleError.value = null
results.value = [] results.value = []
runLog.value = [] runLog.value = []
@ -321,6 +417,8 @@ async function fetchSample() {
const data: Sample = await resp.json() const data: Sample = await resp.json()
rawSample.value = data rawSample.value = data
editedPrompt.value = data.prompt editedPrompt.value = data.prompt
systemPrompt.value = data.system_prompt ?? ''
imageUrl.value = data.image_url ?? ''
} catch (err: unknown) { } catch (err: unknown) {
sampleError.value = err instanceof Error ? err.message : String(err) sampleError.value = err instanceof Error ? err.message : String(err)
} finally { } finally {
@ -341,7 +439,8 @@ function toggleAllModels(checked: boolean) {
} }
function startRun() { function startRun() {
if (running.value || !editedPrompt.value.trim() || selectedModels.value.size === 0) return const hasModels = selectedModels.value.size > 0 || selectedCfTextModels.value.size > 0
if (running.value || !editedPrompt.value.trim() || !hasModels) return
running.value = true running.value = true
results.value = [] results.value = []
@ -349,10 +448,13 @@ function startRun() {
correctionsPushMsg.value = null correctionsPushMsg.value = null
const params = new URLSearchParams({ const params = new URLSearchParams({
prompt: editedPrompt.value, prompt: editedPrompt.value,
model_ids: [...selectedModels.value].join(','), model_ids: [...selectedModels.value].join(','),
temperature: temperature.value.toString(), cf_text_model_ids: [...selectedCfTextModels.value].join(','),
product_id: selectedProduct.value?.id ?? '', temperature: temperature.value.toString(),
product_id: selectedProduct.value?.id ?? '',
system: systemPrompt.value,
image_url: imageUrl.value,
}) })
const es = new EventSource(`/api/imitate/run?${params}`) const es = new EventSource(`/api/imitate/run?${params}`)
@ -362,9 +464,13 @@ function startRun() {
try { try {
const msg = JSON.parse(event.data) const msg = JSON.parse(event.data)
if (msg.type === 'start') { if (msg.type === 'start') {
runLog.value.push(`Running ${msg.total_models} model(s)…`) const imgNote = msg.has_image ? ' (with image)' : ''
runLog.value.push(`Running ${msg.total_models} model(s)${imgNote}`)
} else if (msg.type === 'model_start') { } else if (msg.type === 'model_start') {
runLog.value.push(`${msg.model}`) const svc = msg.service === 'cf-text' ? ' [cf-text]' : ''
runLog.value.push(`${msg.model}${svc}`)
} else if (msg.type === 'model_coldstart') {
runLog.value.push(`${msg.model}: cold start — waiting for service to load…`)
} else if (msg.type === 'model_done') { } else if (msg.type === 'model_done') {
const status = msg.error const status = msg.error
? `✕ error: ${msg.error}` ? `✕ error: ${msg.error}`
@ -586,6 +692,46 @@ async function pushCorrections() {
color: var(--color-text, #1a2338); color: var(--color-text, #1a2338);
} }
.sample-image-row {
display: flex;
align-items: center;
gap: 0.75rem;
flex-wrap: wrap;
}
.sample-image-thumb {
width: 120px;
height: 90px;
object-fit: cover;
border-radius: 0.375rem;
border: 1px solid var(--color-border, #d0d7e8);
flex-shrink: 0;
}
.image-badge {
font-size: 0.78rem;
color: var(--color-text-secondary, #6b7a99);
}
.system-badge {
font-size: 0.68rem;
background: color-mix(in srgb, var(--app-primary, #2A6080) 15%, transparent);
color: var(--app-primary, #2A6080);
border-radius: 9999px;
padding: 0.1rem 0.5rem;
margin-left: 0.4rem;
font-weight: 600;
vertical-align: middle;
}
.system-editor {
border-top: 1px solid var(--color-border, #d0d7e8);
border-radius: 0;
border-left: none;
border-right: none;
border-bottom: none;
}
.prompt-label { .prompt-label {
font-size: 0.85rem; font-size: 0.85rem;
font-weight: 600; font-weight: 600;
@ -895,4 +1041,15 @@ async function pushCorrections() {
.msg-ok { color: #065f46; } .msg-ok { color: #065f46; }
.msg-err { color: #b91c1c; } .msg-err { color: #b91c1c; }
.cforch-badge {
font-size: 0.68rem;
background: color-mix(in srgb, var(--app-accent, #059669) 18%, transparent);
color: var(--app-accent, #059669);
border-radius: 9999px;
padding: 0.1rem 0.5rem;
margin-left: 0.4rem;
font-weight: 600;
vertical-align: middle;
}
</style> </style>