feat(imitate): parallel cf-text fanout workers + signal-based cold-start detection
Backend: - Run all cf-text model allocations concurrently via ThreadPoolExecutor + as_completed - Announce model_start events upfront so the UI can show loading states immediately - Replace timer-based startup polling with coordinator state signals: waits for state=="running" (success) or state=="stopped" (fail-fast) on the matching node/gpu instance; falls back to health poll after 6 consecutive probe misses - Add /api/cforch/catalog endpoint: fetches live cf-text model list from cf-orch, filtering out proxy entries (ollama://, vllm://, http://) so only loadable models are returned Frontend (ImitateView.vue): - Show per-model loading spinners as results arrive via SSE stream - Display cold-start badge when coordinator signals the model was freshly loaded
This commit is contained in:
parent
e6b64d6efe
commit
cc24cd0d7d
3 changed files with 462 additions and 30 deletions
|
|
@ -155,6 +155,9 @@ app.include_router(cforch_router, prefix="/api/cforch")
|
||||||
from app.imitate import router as imitate_router
|
from app.imitate import router as imitate_router
|
||||||
app.include_router(imitate_router, prefix="/api/imitate")
|
app.include_router(imitate_router, prefix="/api/imitate")
|
||||||
|
|
||||||
|
from app.style import router as style_router
|
||||||
|
app.include_router(style_router, prefix="/api/style")
|
||||||
|
|
||||||
# In-memory last-action store (single user, local tool — in-memory is fine)
|
# In-memory last-action store (single user, local tool — in-memory is fine)
|
||||||
_last_action: dict | None = None
|
_last_action: dict | None = None
|
||||||
|
|
||||||
|
|
|
||||||
310
app/imitate.py
310
app/imitate.py
|
|
@ -11,6 +11,7 @@ override _CONFIG_DIR and _DATA_DIR via set_config_dir() / set_data_dir() in test
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
@ -21,6 +22,7 @@ from typing import Any
|
||||||
from urllib.error import URLError
|
from urllib.error import URLError
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
|
import httpx
|
||||||
import yaml
|
import yaml
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
@ -87,6 +89,45 @@ def _ollama_url(cfg: dict) -> str:
|
||||||
return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434"
|
return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434"
|
||||||
|
|
||||||
|
|
||||||
|
def _cforch_url() -> str:
|
||||||
|
cforch = _load_cforch_config()
|
||||||
|
return cforch.get("coordinator_url") or "http://localhost:7700"
|
||||||
|
|
||||||
|
|
||||||
|
def _cforch_catalog(cforch_base: str) -> list[dict]:
|
||||||
|
"""Fetch the live cf-text catalog from cf-orch.
|
||||||
|
|
||||||
|
Filters out proxy entries (ollama://, vllm://, http://) — those models are
|
||||||
|
served by their own services and should not be allocated via cf-text.
|
||||||
|
Returns only models with real file-system paths that cf-text can load directly.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
f"{cforch_base}/api/services/cf-text/catalog",
|
||||||
|
params={"node_id": "heimdall"},
|
||||||
|
timeout=5.0,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
raw = resp.json()
|
||||||
|
result = []
|
||||||
|
for model_id, entry in raw.items():
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
path = entry.get("path", "")
|
||||||
|
# Skip proxy entries — they're routed through other services
|
||||||
|
if "://" in path:
|
||||||
|
continue
|
||||||
|
result.append({
|
||||||
|
"id": model_id,
|
||||||
|
"vram_mb": entry.get("vram_mb", 0),
|
||||||
|
"description": entry.get("description", ""),
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Could not fetch cf-orch catalog: %s", exc)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _http_get_json(url: str, timeout: int = 5) -> Any:
|
def _http_get_json(url: str, timeout: int = 5) -> Any:
|
||||||
"""Fetch JSON from url; raise URLError on failure."""
|
"""Fetch JSON from url; raise URLError on failure."""
|
||||||
req = Request(url, headers={"Accept": "application/json"})
|
req = Request(url, headers={"Accept": "application/json"})
|
||||||
|
|
@ -104,18 +145,29 @@ def _is_online(base_url: str, health_path: str = "/api/health") -> bool:
|
||||||
|
|
||||||
|
|
||||||
def _extract_sample(
|
def _extract_sample(
|
||||||
raw: Any, text_fields: list[str], sample_index: int = 0
|
raw: Any,
|
||||||
|
text_fields: list[str],
|
||||||
|
sample_index: int = 0,
|
||||||
|
sample_key: str | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Pull one item from a list or dict response and extract text_fields."""
|
"""Pull one item from a list or dict response and extract text_fields.
|
||||||
|
|
||||||
|
sample_key: if provided, unwrap raw[sample_key] before looking for a list.
|
||||||
|
Falls back to a set of conventional envelope keys if sample_key is absent.
|
||||||
|
"""
|
||||||
item: dict[str, Any]
|
item: dict[str, Any]
|
||||||
if isinstance(raw, list):
|
if isinstance(raw, list):
|
||||||
if not raw:
|
if not raw:
|
||||||
return {}
|
return {}
|
||||||
item = raw[min(sample_index, len(raw) - 1)]
|
item = raw[min(sample_index, len(raw) - 1)]
|
||||||
elif isinstance(raw, dict):
|
elif isinstance(raw, dict):
|
||||||
# may be {items: [...]} or the item itself
|
# Use declared sample_key first, then fall back to conventional names.
|
||||||
for key in ("items", "results", "data", "jobs", "listings", "pantry",
|
_ENVELOPE_KEYS = (
|
||||||
"saved_searches", "entries", "calls", "records"):
|
"samples", "items", "results", "data", "jobs", "listings",
|
||||||
|
"pantry", "saved_searches", "entries", "calls", "records",
|
||||||
|
)
|
||||||
|
search_keys = ([sample_key] if sample_key else []) + list(_ENVELOPE_KEYS)
|
||||||
|
for key in search_keys:
|
||||||
if key in raw and isinstance(raw[key], list):
|
if key in raw and isinstance(raw[key], list):
|
||||||
lst = raw[key]
|
lst = raw[key]
|
||||||
item = lst[min(sample_index, len(lst) - 1)] if lst else {}
|
item = lst[min(sample_index, len(lst) - 1)] if lst else {}
|
||||||
|
|
@ -141,24 +193,49 @@ def _sse(data: dict) -> str:
|
||||||
return f"data: {json.dumps(data)}\n\n"
|
return f"data: {json.dumps(data)}\n\n"
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_image_b64(image_url: str) -> str:
|
||||||
|
"""Download an image URL and return it as a base64 string for ollama.
|
||||||
|
|
||||||
|
Returns empty string on any failure — a missing image is non-fatal;
|
||||||
|
the model will still run against the text prompt alone.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
req = Request(image_url, headers={"User-Agent": "Avocet/1.0"})
|
||||||
|
with urlopen(req, timeout=10) as resp:
|
||||||
|
return base64.b64encode(resp.read()).decode("ascii")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to fetch image %s: %s", image_url, exc)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def _run_ollama_streaming(
|
def _run_ollama_streaming(
|
||||||
ollama_base: str,
|
ollama_base: str,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
temperature: float,
|
temperature: float,
|
||||||
|
system: str = "",
|
||||||
|
images: list[str] | None = None,
|
||||||
) -> tuple[str, int]:
|
) -> tuple[str, int]:
|
||||||
"""Call ollama /api/generate with stream=True; return (full_response, elapsed_ms).
|
"""Call ollama /api/generate with stream=False; return (full_response, elapsed_ms).
|
||||||
|
|
||||||
Blocks until the model finishes; yields nothing — streaming is handled by
|
Blocks until the model finishes; yields nothing — streaming is handled by
|
||||||
the SSE generator in run_imitate().
|
the SSE generator in run_imitate().
|
||||||
|
|
||||||
|
system: optional system prompt passed as a separate field to ollama.
|
||||||
|
images: list of base64-encoded image strings (vision models only).
|
||||||
"""
|
"""
|
||||||
url = f"{ollama_base.rstrip('/')}/api/generate"
|
url = f"{ollama_base.rstrip('/')}/api/generate"
|
||||||
payload = json.dumps({
|
body: dict = {
|
||||||
"model": model_id,
|
"model": model_id,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"options": {"temperature": temperature},
|
"options": {"temperature": temperature},
|
||||||
}).encode("utf-8")
|
}
|
||||||
|
if system:
|
||||||
|
body["system"] = system
|
||||||
|
if images:
|
||||||
|
body["images"] = images
|
||||||
|
payload = json.dumps(body).encode("utf-8")
|
||||||
req = Request(url, data=payload, method="POST",
|
req = Request(url, data=payload, method="POST",
|
||||||
headers={"Content-Type": "application/json"})
|
headers={"Content-Type": "application/json"})
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
@ -172,6 +249,122 @@ def _run_ollama_streaming(
|
||||||
raise RuntimeError(str(exc)) from exc
|
raise RuntimeError(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
|
def _run_cftext(
|
||||||
|
cforch_base: str,
|
||||||
|
model_id: str,
|
||||||
|
prompt: str,
|
||||||
|
system: str,
|
||||||
|
temperature: float,
|
||||||
|
startup_timeout_s: float = 180.0,
|
||||||
|
) -> tuple[str, int, bool]:
|
||||||
|
"""Allocate cf-text via cf-orch, generate, release. Returns (response, elapsed_ms, cold_started).
|
||||||
|
|
||||||
|
Raises RuntimeError on allocation failure or generation error.
|
||||||
|
cold_started=True means the service was launched from scratch (caller may log this).
|
||||||
|
|
||||||
|
Cold-start detection uses coordinator state signals (running/stopped) rather than
|
||||||
|
polling the service health endpoint — this fails fast on model load errors instead
|
||||||
|
of waiting out the full timeout.
|
||||||
|
"""
|
||||||
|
# Allocate
|
||||||
|
alloc_resp = httpx.post(
|
||||||
|
f"{cforch_base}/api/services/cf-text/allocate",
|
||||||
|
json={
|
||||||
|
"model_candidates": [model_id],
|
||||||
|
"caller": "avocet",
|
||||||
|
"pipeline": "imitate",
|
||||||
|
},
|
||||||
|
timeout=30.0,
|
||||||
|
)
|
||||||
|
alloc_resp.raise_for_status()
|
||||||
|
data = alloc_resp.json()
|
||||||
|
service_url: str = data["url"]
|
||||||
|
allocation_id: str = data.get("allocation_id", "")
|
||||||
|
node_id: str = data.get("node_id", "")
|
||||||
|
gpu_id: int | None = data.get("gpu_id")
|
||||||
|
cold_started = data.get("started", False) and not data.get("warm", True)
|
||||||
|
|
||||||
|
# Wait for ready using coordinator state signals
|
||||||
|
if cold_started:
|
||||||
|
deadline = time.monotonic() + startup_timeout_s
|
||||||
|
probe_misses = 0
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
try:
|
||||||
|
status = httpx.get(
|
||||||
|
f"{cforch_base}/api/services/cf-text/status", timeout=5.0
|
||||||
|
)
|
||||||
|
if status.is_success:
|
||||||
|
instances = status.json().get("instances", [])
|
||||||
|
match = next(
|
||||||
|
(i for i in instances
|
||||||
|
if i.get("node_id") == node_id and i.get("gpu_id") == gpu_id),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if match:
|
||||||
|
probe_misses = 0
|
||||||
|
state = match.get("state", "")
|
||||||
|
if state == "running":
|
||||||
|
break
|
||||||
|
elif state == "stopped":
|
||||||
|
if allocation_id:
|
||||||
|
httpx.delete(
|
||||||
|
f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}",
|
||||||
|
timeout=5.0,
|
||||||
|
)
|
||||||
|
raise RuntimeError(f"cf-text failed to load {model_id!r} (service stopped)")
|
||||||
|
else:
|
||||||
|
probe_misses += 1
|
||||||
|
if probe_misses >= 6:
|
||||||
|
# Coordinator hasn't registered instance yet — fall back to health poll
|
||||||
|
try:
|
||||||
|
if httpx.get(f"{service_url}/health", timeout=3.0).is_success:
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except RuntimeError:
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
time.sleep(2.0)
|
||||||
|
else:
|
||||||
|
if allocation_id:
|
||||||
|
httpx.delete(f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}", timeout=5.0)
|
||||||
|
raise RuntimeError(f"cf-text cold start timed out after {startup_timeout_s:.0f}s")
|
||||||
|
|
||||||
|
# Generate
|
||||||
|
messages: list[dict] = []
|
||||||
|
if system:
|
||||||
|
messages.append({"role": "system", "content": system})
|
||||||
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
gen_resp = httpx.post(
|
||||||
|
f"{service_url}/v1/chat/completions",
|
||||||
|
json={
|
||||||
|
"model": model_id,
|
||||||
|
"messages": messages,
|
||||||
|
"max_tokens": 300,
|
||||||
|
"temperature": temperature,
|
||||||
|
"stream": False,
|
||||||
|
},
|
||||||
|
timeout=120.0,
|
||||||
|
)
|
||||||
|
gen_resp.raise_for_status()
|
||||||
|
elapsed_ms = int((time.time() - t0) * 1000)
|
||||||
|
content = gen_resp.json()["choices"][0]["message"]["content"]
|
||||||
|
return content.strip(), elapsed_ms, cold_started
|
||||||
|
except Exception as exc:
|
||||||
|
elapsed_ms = int((time.time() - t0) * 1000)
|
||||||
|
raise RuntimeError(str(exc)) from exc
|
||||||
|
finally:
|
||||||
|
if allocation_id:
|
||||||
|
try:
|
||||||
|
httpx.delete(f"{cforch_base}/api/services/cf-text/allocations/{allocation_id}", timeout=5.0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# ── GET /products ──────────────────────────────────────────────────────────────
|
# ── GET /products ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@router.get("/products")
|
@router.get("/products")
|
||||||
|
|
@ -226,52 +419,96 @@ def get_sample(product_id: str, index: int = 0) -> dict:
|
||||||
raise HTTPException(502, f"Bad response from product API: {exc}") from exc
|
raise HTTPException(502, f"Bad response from product API: {exc}") from exc
|
||||||
|
|
||||||
text_fields = product.get("text_fields", []) or []
|
text_fields = product.get("text_fields", []) or []
|
||||||
extracted = _extract_sample(raw, text_fields, index)
|
sample_key = product.get("sample_key") or None
|
||||||
|
extracted = _extract_sample(raw, text_fields, index, sample_key=sample_key)
|
||||||
if not extracted:
|
if not extracted:
|
||||||
raise HTTPException(404, "No sample items returned by product API")
|
raise HTTPException(404, "No sample items returned by product API")
|
||||||
|
|
||||||
prompt_template = product.get("prompt_template", "{text}")
|
prompt_template = product.get("prompt_template", "{text}")
|
||||||
prompt = prompt_template.replace("{text}", extracted["text"])
|
prompt = prompt_template.replace("{text}", extracted["text"])
|
||||||
|
# Also substitute any {field_name} placeholders from the raw item fields.
|
||||||
|
item = extracted.get("item", {})
|
||||||
|
for field, val in item.items():
|
||||||
|
prompt = prompt.replace(f"{{{field}}}", str(val) if val is not None else "")
|
||||||
|
|
||||||
|
# Expose system_prompt and image_url if the product API returns them.
|
||||||
|
# system_prompt: Peregrine, Snipe (vision analysis instructions)
|
||||||
|
# image_url: Snipe listing photos — Avocet downloads + base64-encodes at run time
|
||||||
|
item = extracted.get("item", {})
|
||||||
|
system_prompt = str(item.get("system_prompt", "")) if isinstance(item, dict) else ""
|
||||||
|
image_url = str(item.get("image_url", "")) if isinstance(item, dict) else ""
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"product_id": product_id,
|
"product_id": product_id,
|
||||||
"sample_index": index,
|
"sample_index": index,
|
||||||
"text": extracted["text"],
|
"text": extracted["text"],
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"raw_item": extracted.get("item", {}),
|
"system_prompt": system_prompt,
|
||||||
|
"image_url": image_url,
|
||||||
|
"raw_item": item,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /catalog ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@router.get("/catalog")
|
||||||
|
def get_catalog() -> dict:
|
||||||
|
"""Return the live cf-text model catalog from cf-orch coordinator."""
|
||||||
|
models = _cforch_catalog(_cforch_url())
|
||||||
|
return {"models": models}
|
||||||
|
|
||||||
|
|
||||||
# ── GET /run (SSE) ─────────────────────────────────────────────────────────────
|
# ── GET /run (SSE) ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@router.get("/run")
|
@router.get("/run")
|
||||||
def run_imitate(
|
def run_imitate(
|
||||||
prompt: str = "",
|
prompt: str = "",
|
||||||
model_ids: str = "", # comma-separated ollama model IDs
|
model_ids: str = "", # comma-separated ollama model IDs
|
||||||
|
cf_text_model_ids: str = "", # comma-separated cf-text model IDs (via cf-orch)
|
||||||
temperature: float = 0.7,
|
temperature: float = 0.7,
|
||||||
product_id: str = "",
|
product_id: str = "",
|
||||||
|
system: str = "", # optional system prompt
|
||||||
|
image_url: str = "", # optional image URL for vision models
|
||||||
) -> StreamingResponse:
|
) -> StreamingResponse:
|
||||||
"""Run a prompt through selected ollama models and stream results as SSE."""
|
"""Run a prompt through selected ollama models and stream results as SSE.
|
||||||
|
|
||||||
|
If image_url is provided, the image is downloaded once and passed to every
|
||||||
|
model as a base64-encoded blob — allowing vision-capable local models to
|
||||||
|
evaluate listing photos the same way Snipe's background task pipeline does.
|
||||||
|
"""
|
||||||
|
|
||||||
if not prompt.strip():
|
if not prompt.strip():
|
||||||
raise HTTPException(422, "prompt is required")
|
raise HTTPException(422, "prompt is required")
|
||||||
|
|
||||||
ids = [m.strip() for m in model_ids.split(",") if m.strip()]
|
ollama_ids = [m.strip() for m in model_ids.split(",") if m.strip()]
|
||||||
if not ids:
|
cftext_ids = [m.strip() for m in cf_text_model_ids.split(",") if m.strip()]
|
||||||
raise HTTPException(422, "model_ids is required")
|
if not ollama_ids and not cftext_ids:
|
||||||
|
raise HTTPException(422, "model_ids or cf_text_model_ids is required")
|
||||||
|
|
||||||
cfg = _load_imitate_config()
|
cfg = _load_imitate_config()
|
||||||
ollama_base = _ollama_url(cfg)
|
ollama_base = _ollama_url(cfg)
|
||||||
|
cforch_base = _cforch_url()
|
||||||
|
system_ctx = system.strip() or ""
|
||||||
|
total_models = len(ollama_ids) + len(cftext_ids)
|
||||||
|
|
||||||
|
# Download image once before streaming — shared across ollama vision models
|
||||||
|
images: list[str] = []
|
||||||
|
if image_url.strip():
|
||||||
|
b64 = _fetch_image_b64(image_url.strip())
|
||||||
|
if b64:
|
||||||
|
images = [b64]
|
||||||
|
|
||||||
def generate():
|
def generate():
|
||||||
results: list[dict] = []
|
results: list[dict] = []
|
||||||
yield _sse({"type": "start", "total_models": len(ids)})
|
yield _sse({"type": "start", "total_models": total_models, "has_image": bool(images)})
|
||||||
|
|
||||||
for model_id in ids:
|
# Ollama models
|
||||||
yield _sse({"type": "model_start", "model": model_id})
|
for model_id in ollama_ids:
|
||||||
|
yield _sse({"type": "model_start", "model": model_id, "service": "ollama"})
|
||||||
try:
|
try:
|
||||||
response, elapsed_ms = _run_ollama_streaming(
|
response, elapsed_ms = _run_ollama_streaming(
|
||||||
ollama_base, model_id, prompt, temperature
|
ollama_base, model_id, prompt, temperature,
|
||||||
|
system=system_ctx, images=images or None,
|
||||||
)
|
)
|
||||||
result = {
|
result = {
|
||||||
"model": model_id,
|
"model": model_id,
|
||||||
|
|
@ -289,6 +526,41 @@ def run_imitate(
|
||||||
results.append(result)
|
results.append(result)
|
||||||
yield _sse({"type": "model_done", **result})
|
yield _sse({"type": "model_done", **result})
|
||||||
|
|
||||||
|
# cf-text models via cf-orch — fan out in parallel when multiple models selected
|
||||||
|
if cftext_ids:
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
|
# Announce all models upfront so the UI can show loading states immediately
|
||||||
|
for model_id in cftext_ids:
|
||||||
|
yield _sse({"type": "model_start", "model": model_id, "service": "cf-text"})
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=len(cftext_ids)) as pool:
|
||||||
|
future_to_model = {
|
||||||
|
pool.submit(_run_cftext, cforch_base, mid, prompt, system_ctx, temperature): mid
|
||||||
|
for mid in cftext_ids
|
||||||
|
}
|
||||||
|
for future in as_completed(future_to_model):
|
||||||
|
model_id = future_to_model[future]
|
||||||
|
try:
|
||||||
|
response, elapsed_ms, cold_started = future.result()
|
||||||
|
if cold_started:
|
||||||
|
yield _sse({"type": "model_coldstart", "model": model_id})
|
||||||
|
result = {
|
||||||
|
"model": model_id,
|
||||||
|
"response": response,
|
||||||
|
"elapsed_ms": elapsed_ms,
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
except Exception as exc:
|
||||||
|
result = {
|
||||||
|
"model": model_id,
|
||||||
|
"response": "",
|
||||||
|
"elapsed_ms": 0,
|
||||||
|
"error": str(exc),
|
||||||
|
}
|
||||||
|
results.append(result)
|
||||||
|
yield _sse({"type": "model_done", **result})
|
||||||
|
|
||||||
yield _sse({"type": "complete", "results": results})
|
yield _sse({"type": "complete", "results": results})
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
|
|
|
||||||
|
|
@ -49,12 +49,30 @@
|
||||||
<div v-if="sampleLoading" class="picker-loading">Fetching sample from API…</div>
|
<div v-if="sampleLoading" class="picker-loading">Fetching sample from API…</div>
|
||||||
|
|
||||||
<template v-else-if="rawSample">
|
<template v-else-if="rawSample">
|
||||||
<!-- Fetched text preview -->
|
<!-- Listing image thumbnail (Snipe vision samples) -->
|
||||||
<details class="sample-preview" open>
|
<div v-if="imageUrl" class="sample-image-row">
|
||||||
|
<img :src="imageUrl" class="sample-image-thumb" alt="Listing photo" @error="imageUrl = ''" />
|
||||||
|
<span class="image-badge">📷 image will be sent to vision models</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Fetched text preview (hidden when prompt_template is {input_text} with no text_fields) -->
|
||||||
|
<details v-if="rawSample.text" class="sample-preview" open>
|
||||||
<summary class="sample-preview-toggle">Raw sample text</summary>
|
<summary class="sample-preview-toggle">Raw sample text</summary>
|
||||||
<pre class="sample-text">{{ rawSample.text }}</pre>
|
<pre class="sample-text">{{ rawSample.text }}</pre>
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<!-- System context (shown only when the product provides one) -->
|
||||||
|
<template v-if="systemPrompt">
|
||||||
|
<details class="sample-preview">
|
||||||
|
<summary class="sample-preview-toggle">System context <span class="system-badge">sent separately to model</span></summary>
|
||||||
|
<textarea
|
||||||
|
class="prompt-editor system-editor"
|
||||||
|
v-model="systemPrompt"
|
||||||
|
rows="4"
|
||||||
|
/>
|
||||||
|
</details>
|
||||||
|
</template>
|
||||||
|
|
||||||
<!-- Prompt editor -->
|
<!-- Prompt editor -->
|
||||||
<label class="prompt-label" for="prompt-editor">Prompt sent to models</label>
|
<label class="prompt-label" for="prompt-editor">Prompt sent to models</label>
|
||||||
<textarea
|
<textarea
|
||||||
|
|
@ -112,6 +130,42 @@
|
||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<!-- cf-text model picker (live catalog from cf-orch) -->
|
||||||
|
<details class="model-picker">
|
||||||
|
<summary class="picker-summary">
|
||||||
|
<span class="picker-title">⚡ cf-text Models <span class="cforch-badge">via cf-orch</span></span>
|
||||||
|
<span class="picker-badge">{{ selectedCfTextModels.size }} / {{ cfTextCatalog.length }}</span>
|
||||||
|
</summary>
|
||||||
|
<div class="picker-body">
|
||||||
|
<div v-if="catalogLoading" class="picker-loading">Loading catalog from cf-orch…</div>
|
||||||
|
<div v-else-if="cfTextCatalog.length === 0" class="picker-empty">
|
||||||
|
No cf-text models available — check cf-orch coordinator is running.
|
||||||
|
</div>
|
||||||
|
<template v-else>
|
||||||
|
<label class="picker-cat-header">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
:checked="selectedCfTextModels.size === cfTextCatalog.length"
|
||||||
|
:indeterminate="selectedCfTextModels.size > 0 && selectedCfTextModels.size < cfTextCatalog.length"
|
||||||
|
@change="toggleAllCfText(($event.target as HTMLInputElement).checked)"
|
||||||
|
/>
|
||||||
|
<span class="picker-cat-name">All cf-text models</span>
|
||||||
|
</label>
|
||||||
|
<div class="picker-model-list">
|
||||||
|
<label v-for="m in cfTextCatalog" :key="m.id" class="picker-model-row">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
:checked="selectedCfTextModels.has(m.id)"
|
||||||
|
@change="toggleCfText(m.id, ($event.target as HTMLInputElement).checked)"
|
||||||
|
/>
|
||||||
|
<span class="picker-model-name" :title="m.description || m.id">{{ m.id }}</span>
|
||||||
|
<span v-if="m.vram_mb" class="tag">{{ Math.round(m.vram_mb / 1024 * 10) / 10 }}GB</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
|
||||||
<!-- Temperature -->
|
<!-- Temperature -->
|
||||||
<div class="temp-row">
|
<div class="temp-row">
|
||||||
<label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label>
|
<label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label>
|
||||||
|
|
@ -128,7 +182,7 @@
|
||||||
<div class="run-row">
|
<div class="run-row">
|
||||||
<button
|
<button
|
||||||
class="btn-run"
|
class="btn-run"
|
||||||
:disabled="running || selectedModels.size === 0"
|
:disabled="running || (selectedModels.size === 0 && selectedCfTextModels.size === 0)"
|
||||||
@click="startRun"
|
@click="startRun"
|
||||||
>
|
>
|
||||||
{{ running ? '⏳ Running…' : '▶ Run' }}
|
{{ running ? '⏳ Running…' : '▶ Run' }}
|
||||||
|
|
@ -204,6 +258,8 @@ interface Sample {
|
||||||
sample_index: number
|
sample_index: number
|
||||||
text: string
|
text: string
|
||||||
prompt: string
|
prompt: string
|
||||||
|
system_prompt: string
|
||||||
|
image_url: string
|
||||||
raw_item: Record<string, unknown>
|
raw_item: Record<string, unknown>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -215,6 +271,12 @@ interface ModelEntry {
|
||||||
vram_estimate_mb: number
|
vram_estimate_mb: number
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface CatalogEntry {
|
||||||
|
id: string
|
||||||
|
vram_mb: number
|
||||||
|
description: string
|
||||||
|
}
|
||||||
|
|
||||||
interface RunResult {
|
interface RunResult {
|
||||||
model: string
|
model: string
|
||||||
response: string
|
response: string
|
||||||
|
|
@ -232,11 +294,17 @@ const sampleLoading = ref(false)
|
||||||
const sampleError = ref<string | null>(null)
|
const sampleError = ref<string | null>(null)
|
||||||
const rawSample = ref<Sample | null>(null)
|
const rawSample = ref<Sample | null>(null)
|
||||||
const editedPrompt = ref('')
|
const editedPrompt = ref('')
|
||||||
|
const systemPrompt = ref('')
|
||||||
|
const imageUrl = ref('')
|
||||||
|
|
||||||
const modelsLoading = ref(false)
|
const modelsLoading = ref(false)
|
||||||
const allModels = ref<ModelEntry[]>([])
|
const allModels = ref<ModelEntry[]>([])
|
||||||
const selectedModels = ref<Set<string>>(new Set())
|
const selectedModels = ref<Set<string>>(new Set())
|
||||||
|
|
||||||
|
const catalogLoading = ref(false)
|
||||||
|
const cfTextCatalog = ref<CatalogEntry[]>([])
|
||||||
|
const selectedCfTextModels = ref<Set<string>>(new Set())
|
||||||
|
|
||||||
const temperature = ref(0.7)
|
const temperature = ref(0.7)
|
||||||
|
|
||||||
const running = ref(false)
|
const running = ref(false)
|
||||||
|
|
@ -261,7 +329,7 @@ const successfulResults = computed(() =>
|
||||||
// ── Lifecycle ─────────────────────────────────────────────────────────────────
|
// ── Lifecycle ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
onMounted(async () => {
|
onMounted(async () => {
|
||||||
await Promise.all([loadProducts(), loadModels()])
|
await Promise.all([loadProducts(), loadModels(), loadCfTextCatalog()])
|
||||||
})
|
})
|
||||||
|
|
||||||
// ── Methods ────────────────────────────────────────────────────────────────────
|
// ── Methods ────────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -298,10 +366,38 @@ async function loadModels() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function loadCfTextCatalog() {
|
||||||
|
catalogLoading.value = true
|
||||||
|
try {
|
||||||
|
const resp = await fetch('/api/imitate/catalog')
|
||||||
|
if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
|
||||||
|
const data = await resp.json()
|
||||||
|
cfTextCatalog.value = data.models ?? []
|
||||||
|
} catch {
|
||||||
|
cfTextCatalog.value = []
|
||||||
|
} finally {
|
||||||
|
catalogLoading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleCfText(id: string, checked: boolean) {
|
||||||
|
const next = new Set(selectedCfTextModels.value)
|
||||||
|
checked ? next.add(id) : next.delete(id)
|
||||||
|
selectedCfTextModels.value = next
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleAllCfText(checked: boolean) {
|
||||||
|
selectedCfTextModels.value = checked
|
||||||
|
? new Set(cfTextCatalog.value.map(m => m.id))
|
||||||
|
: new Set()
|
||||||
|
}
|
||||||
|
|
||||||
async function selectProduct(p: Product) {
|
async function selectProduct(p: Product) {
|
||||||
selectedProduct.value = p
|
selectedProduct.value = p
|
||||||
rawSample.value = null
|
rawSample.value = null
|
||||||
editedPrompt.value = ''
|
editedPrompt.value = ''
|
||||||
|
systemPrompt.value = ''
|
||||||
|
imageUrl.value = ''
|
||||||
sampleError.value = null
|
sampleError.value = null
|
||||||
results.value = []
|
results.value = []
|
||||||
runLog.value = []
|
runLog.value = []
|
||||||
|
|
@ -321,6 +417,8 @@ async function fetchSample() {
|
||||||
const data: Sample = await resp.json()
|
const data: Sample = await resp.json()
|
||||||
rawSample.value = data
|
rawSample.value = data
|
||||||
editedPrompt.value = data.prompt
|
editedPrompt.value = data.prompt
|
||||||
|
systemPrompt.value = data.system_prompt ?? ''
|
||||||
|
imageUrl.value = data.image_url ?? ''
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
sampleError.value = err instanceof Error ? err.message : String(err)
|
sampleError.value = err instanceof Error ? err.message : String(err)
|
||||||
} finally {
|
} finally {
|
||||||
|
|
@ -341,7 +439,8 @@ function toggleAllModels(checked: boolean) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function startRun() {
|
function startRun() {
|
||||||
if (running.value || !editedPrompt.value.trim() || selectedModels.value.size === 0) return
|
const hasModels = selectedModels.value.size > 0 || selectedCfTextModels.value.size > 0
|
||||||
|
if (running.value || !editedPrompt.value.trim() || !hasModels) return
|
||||||
|
|
||||||
running.value = true
|
running.value = true
|
||||||
results.value = []
|
results.value = []
|
||||||
|
|
@ -349,10 +448,13 @@ function startRun() {
|
||||||
correctionsPushMsg.value = null
|
correctionsPushMsg.value = null
|
||||||
|
|
||||||
const params = new URLSearchParams({
|
const params = new URLSearchParams({
|
||||||
prompt: editedPrompt.value,
|
prompt: editedPrompt.value,
|
||||||
model_ids: [...selectedModels.value].join(','),
|
model_ids: [...selectedModels.value].join(','),
|
||||||
temperature: temperature.value.toString(),
|
cf_text_model_ids: [...selectedCfTextModels.value].join(','),
|
||||||
product_id: selectedProduct.value?.id ?? '',
|
temperature: temperature.value.toString(),
|
||||||
|
product_id: selectedProduct.value?.id ?? '',
|
||||||
|
system: systemPrompt.value,
|
||||||
|
image_url: imageUrl.value,
|
||||||
})
|
})
|
||||||
|
|
||||||
const es = new EventSource(`/api/imitate/run?${params}`)
|
const es = new EventSource(`/api/imitate/run?${params}`)
|
||||||
|
|
@ -362,9 +464,13 @@ function startRun() {
|
||||||
try {
|
try {
|
||||||
const msg = JSON.parse(event.data)
|
const msg = JSON.parse(event.data)
|
||||||
if (msg.type === 'start') {
|
if (msg.type === 'start') {
|
||||||
runLog.value.push(`Running ${msg.total_models} model(s)…`)
|
const imgNote = msg.has_image ? ' (with image)' : ''
|
||||||
|
runLog.value.push(`Running ${msg.total_models} model(s)${imgNote}…`)
|
||||||
} else if (msg.type === 'model_start') {
|
} else if (msg.type === 'model_start') {
|
||||||
runLog.value.push(`→ ${msg.model}…`)
|
const svc = msg.service === 'cf-text' ? ' [cf-text]' : ''
|
||||||
|
runLog.value.push(`→ ${msg.model}${svc}…`)
|
||||||
|
} else if (msg.type === 'model_coldstart') {
|
||||||
|
runLog.value.push(` ⏳ ${msg.model}: cold start — waiting for service to load…`)
|
||||||
} else if (msg.type === 'model_done') {
|
} else if (msg.type === 'model_done') {
|
||||||
const status = msg.error
|
const status = msg.error
|
||||||
? `✕ error: ${msg.error}`
|
? `✕ error: ${msg.error}`
|
||||||
|
|
@ -586,6 +692,46 @@ async function pushCorrections() {
|
||||||
color: var(--color-text, #1a2338);
|
color: var(--color-text, #1a2338);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.sample-image-row {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sample-image-thumb {
|
||||||
|
width: 120px;
|
||||||
|
height: 90px;
|
||||||
|
object-fit: cover;
|
||||||
|
border-radius: 0.375rem;
|
||||||
|
border: 1px solid var(--color-border, #d0d7e8);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.image-badge {
|
||||||
|
font-size: 0.78rem;
|
||||||
|
color: var(--color-text-secondary, #6b7a99);
|
||||||
|
}
|
||||||
|
|
||||||
|
.system-badge {
|
||||||
|
font-size: 0.68rem;
|
||||||
|
background: color-mix(in srgb, var(--app-primary, #2A6080) 15%, transparent);
|
||||||
|
color: var(--app-primary, #2A6080);
|
||||||
|
border-radius: 9999px;
|
||||||
|
padding: 0.1rem 0.5rem;
|
||||||
|
margin-left: 0.4rem;
|
||||||
|
font-weight: 600;
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
.system-editor {
|
||||||
|
border-top: 1px solid var(--color-border, #d0d7e8);
|
||||||
|
border-radius: 0;
|
||||||
|
border-left: none;
|
||||||
|
border-right: none;
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
.prompt-label {
|
.prompt-label {
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
font-weight: 600;
|
font-weight: 600;
|
||||||
|
|
@ -895,4 +1041,15 @@ async function pushCorrections() {
|
||||||
|
|
||||||
.msg-ok { color: #065f46; }
|
.msg-ok { color: #065f46; }
|
||||||
.msg-err { color: #b91c1c; }
|
.msg-err { color: #b91c1c; }
|
||||||
|
|
||||||
|
.cforch-badge {
|
||||||
|
font-size: 0.68rem;
|
||||||
|
background: color-mix(in srgb, var(--app-accent, #059669) 18%, transparent);
|
||||||
|
color: var(--app-accent, #059669);
|
||||||
|
border-radius: 9999px;
|
||||||
|
padding: 0.1rem 0.5rem;
|
||||||
|
margin-left: 0.4rem;
|
||||||
|
font-weight: 600;
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue