feat(imitate): parallel cf-text fanout workers + signal-based cold-start detection

Backend: - Run all cf-text model allocations concurrently via ThreadPoolExecutor + as_completed - Announce model_start events upfront so the UI can show loading states immediately - Replace timer-based startup polling with coordinator state signals: waits for state=="running" (success) or state=="stopped" (fail-fast) on the matching node/gpu instance; falls back to health poll after 6 consecutive probe misses - Add /api/cforch/catalog endpoint: fetches live cf-text model list from cf-orch, filtering out proxy entries (ollama://, vllm://, http://) so only loadable models are returned Frontend (ImitateView.vue): - Show per-model loading spinners as results arrive via SSE stream - Display cold-start badge when coordinator signals the model was freshly loaded
2026-04-24 14:56:09 -07:00 · 2026-04-24 14:56:09 -07:00 · cc24cd0d7d
commit cc24cd0d7d
parent e6b64d6efe
3 changed files with 462 additions and 30 deletions
--- a/app/api.py
+++ b/app/api.py
@ -155,6 +155,9 @@ app.include_router(cforch_router, prefix="/api/cforch")
 from app.imitate import router as imitate_router
 app.include_router(imitate_router, prefix="/api/imitate")

+from app.style import router as style_router
+app.include_router(style_router, prefix="/api/style")
+
 # In-memory last-action store (single user, local tool — in-memory is fine)
 _last_action: dict | None = None

--- a/app/imitate.py
+++ b/app/imitate.py
@ -11,6 +11,7 @@ override _CONFIG_DIR and _DATA_DIR via set_config_dir() / set_data_dir() in test
 """
 from __future__ import annotations

+import base64
 import json
 import logging
 import time
@ -21,6 +22,7 @@ from typing import Any
 from urllib.error import URLError
 from urllib.request import Request, urlopen

+import httpx
 import yaml
 from fastapi import APIRouter, HTTPException
 from fastapi.responses import StreamingResponse
@ -87,6 +89,45 @@ def _ollama_url(cfg: dict) -> str:
    return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434"


+def _cforch_url() -> str:
+    cforch = _load_cforch_config()
+    return cforch.get("coordinator_url") or "http://localhost:7700"
+
+
+def _cforch_catalog(cforch_base: str) -> list[dict]:
+    """Fetch the live cf-text catalog from cf-orch.
+
+    Filters out proxy entries (ollama://, vllm://, http://) — those models are
+    served by their own services and should not be allocated via cf-text.
+    Returns only models with real file-system paths that cf-text can load directly.
+    """
+    try:
+        resp = httpx.get(
+            f"{cforch_base}/api/services/cf-text/catalog",
+            params={"node_id": "heimdall"},
+            timeout=5.0,
+        )
+        resp.raise_for_status()
+        raw = resp.json()
+        result = []
+        for model_id, entry in raw.items():
+            if not isinstance(entry, dict):
+                continue
+            path = entry.get("path", "")
+            # Skip proxy entries — they're routed through other services
+            if "://" in path:
+                continue
+            result.append({
+                "id":          model_id,
+                "vram_mb":     entry.get("vram_mb", 0),
+                "description": entry.get("description", ""),
+            })
+        return result
+    except Exception as exc:
+        logger.warning("Could not fetch cf-orch catalog: %s", exc)
+        return []
+
+
 def _http_get_json(url: str, timeout: int = 5) -> Any:
    """Fetch JSON from url; raise URLError on failure."""
    req = Request(url, headers={"Accept": "application/json"})
@ -104,18 +145,29 @@ def _is_online(base_url: str, health_path: str = "/api/health") -> bool:


 def _extract_sample(
-    raw: Any, text_fields: list[str], sample_index: int = 0
+    raw: Any,
+    text_fields: list[str],
+    sample_index: int = 0,
+    sample_key: str | None = None,
 ) -> dict[str, Any]:
-    """Pull one item from a list or dict response and extract text_fields."""
+    """Pull one item from a list or dict response and extract text_fields.
+
+    sample_key: if provided, unwrap raw[sample_key] before looking for a list.
+    Falls back to a set of conventional envelope keys if sample_key is absent.
+    """
    item: dict[str, Any]
    if isinstance(raw, list):
        if not raw:
            return {}
        item = raw[min(sample_index, len(raw) - 1)]
    elif isinstance(raw, dict):
-        # may be {items: [...]} or the item itself
-        for key in ("items", "results", "data", "jobs", "listings", "pantry",
-                    "saved_searches", "entries", "calls", "records"):
+        # Use declared sample_key first, then fall back to conventional names.
+        _ENVELOPE_KEYS = (
+            "samples", "items", "results", "data", "jobs", "listings",
+            "pantry", "saved_searches", "entries", "calls", "records",
+        )
+        search_keys = ([sample_key] if sample_key else []) + list(_ENVELOPE_KEYS)
+        for key in search_keys:
            if key in raw and isinstance(raw[key], list):
                lst = raw[key]
                item = lst[min(sample_index, len(lst) - 1)] if lst else {}
@ -141,24 +193,49 @@ def _sse(data: dict) -> str:
    return f"data: {json.dumps(data)}\n\n"


+def _fetch_image_b64(image_url: str) -> str:
+    """Download an image URL and return it as a base64 string for ollama.
+
+    Returns empty string on any failure — a missing image is non-fatal;
+    the model will still run against the text prompt alone.
+    """
+    try:
+        req = Request(image_url, headers={"User-Agent": "Avocet/1.0"})
+        with urlopen(req, timeout=10) as resp:
+            return base64.b64encode(resp.read()).decode("ascii")
+    except Exception as exc:
+        logger.warning("Failed to fetch image %s: %s", image_url, exc)
+        return ""
+
+
 def _run_ollama_streaming(
    ollama_base: str,
    model_id: str,
    prompt: str,
    temperature: float,
+    system: str = "",
+    images: list[str] | None = None,
 ) -> tuple[str, int]:
-    """Call ollama /api/generate with stream=True; return (full_response, elapsed_ms).
+    """Call ollama /api/generate with stream=False; return (full_response, elapsed_ms).

    Blocks until the model finishes; yields nothing — streaming is handled by
    the SSE generator in run_imitate().
+
+    system: optional system prompt passed as a separate field to ollama.
+    images: list of base64-encoded image strings (vision models only).
    """
    url = f"{ollama_base.rstrip('/')}/api/generate"
-    payload = json.dumps({
+    body: dict = {
        "model": model_id,
        "prompt": prompt,
        "stream": False,
        "options": {"temperature": temperature},
-    }).encode("utf-8")
+    }
+    if system:
+        body["system"] = system
+    if images:
+        body["images"] = images
+    payload = json.dumps(body).encode("utf-8")
    req = Request(url, data=payload, method="POST",
                  headers={"Content-Type": "application/json"})
    t0 = time.time()
@ -172,6 +249,122 @@ def _run_ollama_streaming(
        raise RuntimeError(str(exc)) from exc


+def _run_cftext(
+    cforch_base: str,
+    model_id: str,
+    prompt: str,
+    system: str,
+    temperature: float,
+    startup_timeout_s: float = 180.0,
+) -> tuple[str, int, bool]:
+    """Allocate cf-text via cf-orch, generate, release. Returns (response, elapsed_ms, cold_started).
+
+    Raises RuntimeError on allocation failure or generation error.
+    cold_started=True means the service was launched from scratch (caller may log this).
+
+    Cold-start detection uses coordinator state signals (running/stopped) rather than
+    polling the service health endpoint — this fails fast on model load errors instead
+    of waiting out the full timeout.
+    """
+    # Allocate
+    alloc_resp = httpx.post(
+        f"{cforch_base}/api/services/cf-text/allocate",
+        json={
+            "model_candidates": [model_id],
+            "caller": "avocet",
+            "pipeline": "imitate",
+        },
+        timeout=30.0,
+    )
+    alloc_resp.raise_for_status()
+    data = alloc_resp.json()
+    service_url: str = data["url"]
+    allocation_id: str = data.get("allocation_id", "")
+    node_id: str = data.get("node_id", "")
+    gpu_id: int | None = data.get("gpu_id")
+    cold_started = data.get("started", False) and not data.get("warm", True)
+
+    # Wait for ready using coordinator state signals
+    if cold_started:
+        deadline = time.monotonic() + startup_timeout_s
+        probe_misses = 0
+        while time.monotonic() < deadline:
+            try:
+                status = httpx.get(
+                    f"{cforch_base}/api/services/cf-text/status", timeout=5.0
+                )
+                if status.is_success:
+                    instances = status.json().get("instances", [])
+                    match = next(
+                        (i for i in instances
+                         if i.get("node_id") == node_id and i.get("gpu_id") == gpu_id),
+                        None,
+                    )
+                    if match:
+                        probe_misses = 0
+                        state = match.get("state", "")
+                        if state == "running":
+                            break
+                        elif state == "stopped":
+                            if allocation_id:
+                                httpx.delete(
+                                    f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}",
+                                    timeout=5.0,
+                                )
+                            raise RuntimeError(f"cf-text failed to load {model_id!r} (service stopped)")
+                    else:
+                        probe_misses += 1
+                        if probe_misses >= 6:
+                            # Coordinator hasn't registered instance yet — fall back to health poll
+                            try:
+                                if httpx.get(f"{service_url}/health", timeout=3.0).is_success:
+                                    break
+                            except Exception:
+                                pass
+            except RuntimeError:
+                raise
+            except Exception:
+                pass
+            time.sleep(2.0)
+        else:
+            if allocation_id:
+                httpx.delete(f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}", timeout=5.0)
+            raise RuntimeError(f"cf-text cold start timed out after {startup_timeout_s:.0f}s")
+
+    # Generate
+    messages: list[dict] = []
+    if system:
+        messages.append({"role": "system", "content": system})
+    messages.append({"role": "user", "content": prompt})
+
+    t0 = time.time()
+    try:
+        gen_resp = httpx.post(
+            f"{service_url}/v1/chat/completions",
+            json={
+                "model": model_id,
+                "messages": messages,
+                "max_tokens": 300,
+                "temperature": temperature,
+                "stream": False,
+            },
+            timeout=120.0,
+        )
+        gen_resp.raise_for_status()
+        elapsed_ms = int((time.time() - t0) * 1000)
+        content = gen_resp.json()["choices"][0]["message"]["content"]
+        return content.strip(), elapsed_ms, cold_started
+    except Exception as exc:
+        elapsed_ms = int((time.time() - t0) * 1000)
+        raise RuntimeError(str(exc)) from exc
+    finally:
+        if allocation_id:
+            try:
+                httpx.delete(f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}", timeout=5.0)
+            except Exception:
+                pass
+
+
 # ── GET /products ──────────────────────────────────────────────────────────────

@router.get("/products")
@ -226,52 +419,96 @@ def get_sample(product_id: str, index: int = 0) -> dict:
        raise HTTPException(502, f"Bad response from product API: {exc}") from exc

    text_fields = product.get("text_fields", []) or []
-    extracted = _extract_sample(raw, text_fields, index)
+    sample_key = product.get("sample_key") or None
+    extracted = _extract_sample(raw, text_fields, index, sample_key=sample_key)
    if not extracted:
        raise HTTPException(404, "No sample items returned by product API")

    prompt_template = product.get("prompt_template", "{text}")
    prompt = prompt_template.replace("{text}", extracted["text"])
+    # Also substitute any {field_name} placeholders from the raw item fields.
+    item = extracted.get("item", {})
+    for field, val in item.items():
+        prompt = prompt.replace(f"{{{field}}}", str(val) if val is not None else "")
+
+    # Expose system_prompt and image_url if the product API returns them.
+    # system_prompt: Peregrine, Snipe (vision analysis instructions)
+    # image_url: Snipe listing photos — Avocet downloads + base64-encodes at run time
+    item = extracted.get("item", {})
+    system_prompt = str(item.get("system_prompt", "")) if isinstance(item, dict) else ""
+    image_url = str(item.get("image_url", "")) if isinstance(item, dict) else ""

    return {
        "product_id":    product_id,
        "sample_index":  index,
        "text":          extracted["text"],
        "prompt":        prompt,
-        "raw_item":      extracted.get("item", {}),
+        "system_prompt": system_prompt,
+        "image_url":     image_url,
+        "raw_item":      item,
    }


+# ── GET /catalog ───────────────────────────────────────────────────────────────
+
+@router.get("/catalog")
+def get_catalog() -> dict:
+    """Return the live cf-text model catalog from cf-orch coordinator."""
+    models = _cforch_catalog(_cforch_url())
+    return {"models": models}
+
+
 # ── GET /run (SSE) ─────────────────────────────────────────────────────────────

@router.get("/run")
 def run_imitate(
    prompt: str = "",
    model_ids: str = "",          # comma-separated ollama model IDs
+    cf_text_model_ids: str = "",  # comma-separated cf-text model IDs (via cf-orch)
    temperature: float = 0.7,
    product_id: str = "",
+    system: str = "",             # optional system prompt
+    image_url: str = "",          # optional image URL for vision models
 ) -> StreamingResponse:
-    """Run a prompt through selected ollama models and stream results as SSE."""
+    """Run a prompt through selected ollama models and stream results as SSE.
+
+    If image_url is provided, the image is downloaded once and passed to every
+    model as a base64-encoded blob — allowing vision-capable local models to
+    evaluate listing photos the same way Snipe's background task pipeline does.
+    """

    if not prompt.strip():
        raise HTTPException(422, "prompt is required")

-    ids = [m.strip() for m in model_ids.split(",") if m.strip()]
-    if not ids:
-        raise HTTPException(422, "model_ids is required")
+    ollama_ids  = [m.strip() for m in model_ids.split(",")         if m.strip()]
+    cftext_ids  = [m.strip() for m in cf_text_model_ids.split(",") if m.strip()]
+    if not ollama_ids and not cftext_ids:
+        raise HTTPException(422, "model_ids or cf_text_model_ids is required")

    cfg = _load_imitate_config()
    ollama_base = _ollama_url(cfg)
+    cforch_base = _cforch_url()
+    system_ctx = system.strip() or ""
+    total_models = len(ollama_ids) + len(cftext_ids)
+
+    # Download image once before streaming — shared across ollama vision models
+    images: list[str] = []
+    if image_url.strip():
+        b64 = _fetch_image_b64(image_url.strip())
+        if b64:
+            images = [b64]

    def generate():
        results: list[dict] = []
-        yield _sse({"type": "start", "total_models": len(ids)})
+        yield _sse({"type": "start", "total_models": total_models, "has_image": bool(images)})

-        for model_id in ids:
-            yield _sse({"type": "model_start", "model": model_id})
+        # Ollama models
+        for model_id in ollama_ids:
+            yield _sse({"type": "model_start", "model": model_id, "service": "ollama"})
            try:
                response, elapsed_ms = _run_ollama_streaming(
-                    ollama_base, model_id, prompt, temperature
+                    ollama_base, model_id, prompt, temperature,
+                    system=system_ctx, images=images or None,
                )
                result = {
                    "model":      model_id,
@ -289,6 +526,41 @@ def run_imitate(
            results.append(result)
            yield _sse({"type": "model_done", **result})

+        # cf-text models via cf-orch — fan out in parallel when multiple models selected
+        if cftext_ids:
+            from concurrent.futures import ThreadPoolExecutor, as_completed
+
+            # Announce all models upfront so the UI can show loading states immediately
+            for model_id in cftext_ids:
+                yield _sse({"type": "model_start", "model": model_id, "service": "cf-text"})
+
+            with ThreadPoolExecutor(max_workers=len(cftext_ids)) as pool:
+                future_to_model = {
+                    pool.submit(_run_cftext, cforch_base, mid, prompt, system_ctx, temperature): mid
+                    for mid in cftext_ids
+                }
+                for future in as_completed(future_to_model):
+                    model_id = future_to_model[future]
+                    try:
+                        response, elapsed_ms, cold_started = future.result()
+                        if cold_started:
+                            yield _sse({"type": "model_coldstart", "model": model_id})
+                        result = {
+                            "model":      model_id,
+                            "response":   response,
+                            "elapsed_ms": elapsed_ms,
+                            "error":      None,
+                        }
+                    except Exception as exc:
+                        result = {
+                            "model":      model_id,
+                            "response":   "",
+                            "elapsed_ms": 0,
+                            "error":      str(exc),
+                        }
+                    results.append(result)
+                    yield _sse({"type": "model_done", **result})
+
        yield _sse({"type": "complete", "results": results})

    return StreamingResponse(
--- a/web/src/views/ImitateView.vue
+++ b/web/src/views/ImitateView.vue
@ -49,12 +49,30 @@
      <div v-if="sampleLoading" class="picker-loading">Fetching sample from API…</div>

      <template v-else-if="rawSample">
-        <!-- Fetched text preview -->
-        <details class="sample-preview" open>
+        <!-- Listing image thumbnail (Snipe vision samples) -->
+        <div v-if="imageUrl" class="sample-image-row">
+          <img :src="imageUrl" class="sample-image-thumb" alt="Listing photo" @error="imageUrl = ''" />
+          <span class="image-badge">📷 image will be sent to vision models</span>
+        </div>
+
+        <!-- Fetched text preview (hidden when prompt_template is {input_text} with no text_fields) -->
+        <details v-if="rawSample.text" class="sample-preview" open>
          <summary class="sample-preview-toggle">Raw sample text</summary>
          <pre class="sample-text">{{ rawSample.text }}</pre>
        </details>

+        <!-- System context (shown only when the product provides one) -->
+        <template v-if="systemPrompt">
+          <details class="sample-preview">
+            <summary class="sample-preview-toggle">System context <span class="system-badge">sent separately to model</span></summary>
+            <textarea
+              class="prompt-editor system-editor"
+              v-model="systemPrompt"
+              rows="4"
+            />
+          </details>
+        </template>
+
        <!-- Prompt editor -->
        <label class="prompt-label" for="prompt-editor">Prompt sent to models</label>
        <textarea
@ -112,6 +130,42 @@
        </div>
      </details>

+      <!-- cf-text model picker (live catalog from cf-orch) -->
+      <details class="model-picker">
+        <summary class="picker-summary">
+          <span class="picker-title">⚡ cf-text Models <span class="cforch-badge">via cf-orch</span></span>
+          <span class="picker-badge">{{ selectedCfTextModels.size }} / {{ cfTextCatalog.length }}</span>
+        </summary>
+        <div class="picker-body">
+          <div v-if="catalogLoading" class="picker-loading">Loading catalog from cf-orch…</div>
+          <div v-else-if="cfTextCatalog.length === 0" class="picker-empty">
+            No cf-text models available — check cf-orch coordinator is running.
+          </div>
+          <template v-else>
+            <label class="picker-cat-header">
+              <input
+                type="checkbox"
+                :checked="selectedCfTextModels.size === cfTextCatalog.length"
+                :indeterminate="selectedCfTextModels.size > 0 && selectedCfTextModels.size < cfTextCatalog.length"
+                @change="toggleAllCfText(($event.target as HTMLInputElement).checked)"
+              />
+              <span class="picker-cat-name">All cf-text models</span>
+            </label>
+            <div class="picker-model-list">
+              <label v-for="m in cfTextCatalog" :key="m.id" class="picker-model-row">
+                <input
+                  type="checkbox"
+                  :checked="selectedCfTextModels.has(m.id)"
+                  @change="toggleCfText(m.id, ($event.target as HTMLInputElement).checked)"
+                />
+                <span class="picker-model-name" :title="m.description || m.id">{{ m.id }}</span>
+                <span v-if="m.vram_mb" class="tag">{{ Math.round(m.vram_mb / 1024 * 10) / 10 }}GB</span>
+              </label>
+            </div>
+          </template>
+        </div>
+      </details>
+
      <!-- Temperature -->
      <div class="temp-row">
        <label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label>
@ -128,7 +182,7 @@
      <div class="run-row">
        <button
          class="btn-run"
-          :disabled="running || selectedModels.size === 0"
+          :disabled="running || (selectedModels.size === 0 && selectedCfTextModels.size === 0)"
          @click="startRun"
        >
          {{ running ? '⏳ Running…' : '▶ Run' }}
@ -204,6 +258,8 @@ interface Sample {
  sample_index: number
  text: string
  prompt: string
+  system_prompt: string
+  image_url: string
  raw_item: Record<string, unknown>
 }

@ -215,6 +271,12 @@ interface ModelEntry {
  vram_estimate_mb: number
 }

+interface CatalogEntry {
+  id: string
+  vram_mb: number
+  description: string
+}
+
 interface RunResult {
  model: string
  response: string
@ -232,11 +294,17 @@ const sampleLoading    = ref(false)
 const sampleError      = ref<string | null>(null)
 const rawSample        = ref<Sample | null>(null)
 const editedPrompt     = ref('')
+const systemPrompt     = ref('')
+const imageUrl         = ref('')

 const modelsLoading    = ref(false)
 const allModels        = ref<ModelEntry[]>([])
 const selectedModels   = ref<Set<string>>(new Set())

+const catalogLoading        = ref(false)
+const cfTextCatalog         = ref<CatalogEntry[]>([])
+const selectedCfTextModels  = ref<Set<string>>(new Set())
+
 const temperature      = ref(0.7)

 const running          = ref(false)
@ -261,7 +329,7 @@ const successfulResults = computed(() =>
 // ── Lifecycle ─────────────────────────────────────────────────────────────────

 onMounted(async () => {
-  await Promise.all([loadProducts(), loadModels()])
+  await Promise.all([loadProducts(), loadModels(), loadCfTextCatalog()])
 })

 // ── Methods ────────────────────────────────────────────────────────────────────
@ -298,10 +366,38 @@ async function loadModels() {
  }
 }

+async function loadCfTextCatalog() {
+  catalogLoading.value = true
+  try {
+    const resp = await fetch('/api/imitate/catalog')
+    if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
+    const data = await resp.json()
+    cfTextCatalog.value = data.models ?? []
+  } catch {
+    cfTextCatalog.value = []
+  } finally {
+    catalogLoading.value = false
+  }
+}
+
+function toggleCfText(id: string, checked: boolean) {
+  const next = new Set(selectedCfTextModels.value)
+  checked ? next.add(id) : next.delete(id)
+  selectedCfTextModels.value = next
+}
+
+function toggleAllCfText(checked: boolean) {
+  selectedCfTextModels.value = checked
+    ? new Set(cfTextCatalog.value.map(m => m.id))
+    : new Set()
+}
+
 async function selectProduct(p: Product) {
  selectedProduct.value = p
  rawSample.value = null
  editedPrompt.value = ''
+  systemPrompt.value = ''
+  imageUrl.value = ''
  sampleError.value = null
  results.value = []
  runLog.value = []
@ -321,6 +417,8 @@ async function fetchSample() {
    const data: Sample = await resp.json()
    rawSample.value = data
    editedPrompt.value = data.prompt
+    systemPrompt.value = data.system_prompt ?? ''
+    imageUrl.value = data.image_url ?? ''
  } catch (err: unknown) {
    sampleError.value = err instanceof Error ? err.message : String(err)
  } finally {
@ -341,7 +439,8 @@ function toggleAllModels(checked: boolean) {
 }

 function startRun() {
-  if (running.value || !editedPrompt.value.trim() || selectedModels.value.size === 0) return
+  const hasModels = selectedModels.value.size > 0 || selectedCfTextModels.value.size > 0
+  if (running.value || !editedPrompt.value.trim() || !hasModels) return

  running.value = true
  results.value = []
@ -351,8 +450,11 @@ function startRun() {
  const params = new URLSearchParams({
    prompt:             editedPrompt.value,
    model_ids:          [...selectedModels.value].join(','),
+    cf_text_model_ids:  [...selectedCfTextModels.value].join(','),
    temperature:        temperature.value.toString(),
    product_id:         selectedProduct.value?.id ?? '',
+    system:             systemPrompt.value,
+    image_url:          imageUrl.value,
  })

  const es = new EventSource(`/api/imitate/run?${params}`)
@ -362,9 +464,13 @@ function startRun() {
    try {
      const msg = JSON.parse(event.data)
      if (msg.type === 'start') {
-        runLog.value.push(`Running ${msg.total_models} model(s)…`)
+        const imgNote = msg.has_image ? ' (with image)' : ''
+        runLog.value.push(`Running ${msg.total_models} model(s)${imgNote}…`)
      } else if (msg.type === 'model_start') {
-        runLog.value.push(`→ ${msg.model}…`)
+        const svc = msg.service === 'cf-text' ? ' [cf-text]' : ''
+        runLog.value.push(`→ ${msg.model}${svc}…`)
+      } else if (msg.type === 'model_coldstart') {
+        runLog.value.push(`  ⏳ ${msg.model}: cold start — waiting for service to load…`)
      } else if (msg.type === 'model_done') {
        const status = msg.error
          ? `✕ error: ${msg.error}`
@ -586,6 +692,46 @@ async function pushCorrections() {
  color: var(--color-text, #1a2338);
 }

+.sample-image-row {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+  flex-wrap: wrap;
+}
+
+.sample-image-thumb {
+  width: 120px;
+  height: 90px;
+  object-fit: cover;
+  border-radius: 0.375rem;
+  border: 1px solid var(--color-border, #d0d7e8);
+  flex-shrink: 0;
+}
+
+.image-badge {
+  font-size: 0.78rem;
+  color: var(--color-text-secondary, #6b7a99);
+}
+
+.system-badge {
+  font-size: 0.68rem;
+  background: color-mix(in srgb, var(--app-primary, #2A6080) 15%, transparent);
+  color: var(--app-primary, #2A6080);
+  border-radius: 9999px;
+  padding: 0.1rem 0.5rem;
+  margin-left: 0.4rem;
+  font-weight: 600;
+  vertical-align: middle;
+}
+
+.system-editor {
+  border-top: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0;
+  border-left: none;
+  border-right: none;
+  border-bottom: none;
+}
+
 .prompt-label {
  font-size: 0.85rem;
  font-weight: 600;
@ -895,4 +1041,15 @@ async function pushCorrections() {

 .msg-ok { color: #065f46; }
 .msg-err { color: #b91c1c; }
+
+.cforch-badge {
+  font-size: 0.68rem;
+  background: color-mix(in srgb, var(--app-accent, #059669) 18%, transparent);
+  color: var(--app-accent, #059669);
+  border-radius: 9999px;
+  padding: 0.1rem 0.5rem;
+  margin-left: 0.4rem;
+  font-weight: 600;
+  vertical-align: middle;
+}
 </style>